sandboxy 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. sandboxy/__init__.py +3 -0
  2. sandboxy/agents/__init__.py +21 -0
  3. sandboxy/agents/base.py +66 -0
  4. sandboxy/agents/llm_prompt.py +308 -0
  5. sandboxy/agents/loader.py +222 -0
  6. sandboxy/api/__init__.py +5 -0
  7. sandboxy/api/app.py +76 -0
  8. sandboxy/api/routes/__init__.py +1 -0
  9. sandboxy/api/routes/agents.py +92 -0
  10. sandboxy/api/routes/local.py +1388 -0
  11. sandboxy/api/routes/tools.py +106 -0
  12. sandboxy/cli/__init__.py +1 -0
  13. sandboxy/cli/main.py +1196 -0
  14. sandboxy/cli/type_detector.py +48 -0
  15. sandboxy/config.py +49 -0
  16. sandboxy/core/__init__.py +1 -0
  17. sandboxy/core/async_runner.py +824 -0
  18. sandboxy/core/mdl_parser.py +441 -0
  19. sandboxy/core/runner.py +599 -0
  20. sandboxy/core/safe_eval.py +165 -0
  21. sandboxy/core/state.py +234 -0
  22. sandboxy/datasets/__init__.py +20 -0
  23. sandboxy/datasets/loader.py +193 -0
  24. sandboxy/datasets/runner.py +442 -0
  25. sandboxy/errors.py +166 -0
  26. sandboxy/local/context.py +235 -0
  27. sandboxy/local/results.py +173 -0
  28. sandboxy/logging.py +31 -0
  29. sandboxy/mcp/__init__.py +25 -0
  30. sandboxy/mcp/client.py +360 -0
  31. sandboxy/mcp/wrapper.py +99 -0
  32. sandboxy/providers/__init__.py +34 -0
  33. sandboxy/providers/anthropic_provider.py +271 -0
  34. sandboxy/providers/base.py +123 -0
  35. sandboxy/providers/http_client.py +101 -0
  36. sandboxy/providers/openai_provider.py +282 -0
  37. sandboxy/providers/openrouter.py +958 -0
  38. sandboxy/providers/registry.py +199 -0
  39. sandboxy/scenarios/__init__.py +11 -0
  40. sandboxy/scenarios/comparison.py +491 -0
  41. sandboxy/scenarios/loader.py +262 -0
  42. sandboxy/scenarios/runner.py +468 -0
  43. sandboxy/scenarios/unified.py +1434 -0
  44. sandboxy/session/__init__.py +21 -0
  45. sandboxy/session/manager.py +278 -0
  46. sandboxy/tools/__init__.py +34 -0
  47. sandboxy/tools/base.py +127 -0
  48. sandboxy/tools/loader.py +270 -0
  49. sandboxy/tools/yaml_tools.py +708 -0
  50. sandboxy/ui/__init__.py +27 -0
  51. sandboxy/ui/dist/assets/index-CgAkYWrJ.css +1 -0
  52. sandboxy/ui/dist/assets/index-D4zoGFcr.js +347 -0
  53. sandboxy/ui/dist/index.html +14 -0
  54. sandboxy/utils/__init__.py +3 -0
  55. sandboxy/utils/time.py +20 -0
  56. sandboxy-0.0.1.dist-info/METADATA +241 -0
  57. sandboxy-0.0.1.dist-info/RECORD +60 -0
  58. sandboxy-0.0.1.dist-info/WHEEL +4 -0
  59. sandboxy-0.0.1.dist-info/entry_points.txt +3 -0
  60. sandboxy-0.0.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,442 @@
1
+ """Dataset runner for multi-case benchmarking."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ import time
8
+ from dataclasses import dataclass, field
9
+ from typing import Any
10
+
11
+ from sandboxy.datasets.loader import Dataset, TestCase
12
+ from sandboxy.scenarios.unified import (
13
+ RunResult,
14
+ UnifiedRunner,
15
+ UnifiedScenarioSpec,
16
+ )
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @dataclass
22
+ class CaseResult:
23
+ """Result for a single test case."""
24
+
25
+ case_id: str
26
+ expected: list[str] = field(default_factory=list)
27
+ actual_outcome: str | None = None
28
+ passed: bool = False
29
+ goal_score: float = 0.0
30
+ max_score: float = 0.0
31
+ percentage: float = 0.0
32
+ run_result: RunResult | None = None
33
+ failure_reason: str | None = None
34
+ latency_ms: int = 0
35
+
36
+ def to_dict(self) -> dict[str, Any]:
37
+ """Convert to dictionary."""
38
+ return {
39
+ "case_id": self.case_id,
40
+ "expected": self.expected,
41
+ "actual_outcome": self.actual_outcome,
42
+ "passed": self.passed,
43
+ "goal_score": self.goal_score,
44
+ "max_score": self.max_score,
45
+ "percentage": self.percentage,
46
+ "failure_reason": self.failure_reason,
47
+ "latency_ms": self.latency_ms,
48
+ "run_result": self.run_result.to_dict() if self.run_result else None,
49
+ }
50
+
51
+
52
+ @dataclass
53
+ class DatasetResult:
54
+ """Aggregated results across all cases."""
55
+
56
+ scenario_id: str
57
+ model: str
58
+ dataset_id: str
59
+ total_cases: int = 0
60
+ passed_cases: int = 0
61
+ failed_cases: int = 0
62
+ pass_rate: float = 0.0
63
+ avg_score: float = 0.0
64
+ avg_percentage: float = 0.0
65
+ case_results: list[CaseResult] = field(default_factory=list)
66
+ by_expected: dict[str, dict[str, int]] = field(default_factory=dict)
67
+ total_time_ms: int = 0
68
+
69
+ def to_dict(self) -> dict[str, Any]:
70
+ """Convert to dictionary."""
71
+ return {
72
+ "scenario_id": self.scenario_id,
73
+ "model": self.model,
74
+ "dataset_id": self.dataset_id,
75
+ "total_cases": self.total_cases,
76
+ "passed_cases": self.passed_cases,
77
+ "failed_cases": self.failed_cases,
78
+ "pass_rate": self.pass_rate,
79
+ "avg_score": self.avg_score,
80
+ "avg_percentage": self.avg_percentage,
81
+ "by_expected": self.by_expected,
82
+ "total_time_ms": self.total_time_ms,
83
+ "case_results": [c.to_dict() for c in self.case_results],
84
+ }
85
+
86
+ def format_table(self) -> str:
87
+ """Format results as a table string."""
88
+ lines = [
89
+ f"Dataset Benchmark: {self.scenario_id}",
90
+ f"Model: {self.model}",
91
+ f"Dataset: {self.dataset_id} ({self.total_cases} cases)",
92
+ "",
93
+ "Results Summary",
94
+ "─" * 50,
95
+ f"Passed: {self.passed_cases}/{self.total_cases} ({self.pass_rate * 100:.1f}%)",
96
+ f"Avg Score: {self.avg_score:.1f} ({self.avg_percentage:.1f}%)",
97
+ f"Time: {self.total_time_ms / 1000:.1f}s ({self.total_time_ms / max(self.total_cases, 1):.0f}ms/case)",
98
+ ]
99
+
100
+ # By expected outcome
101
+ if self.by_expected:
102
+ lines.append("")
103
+ lines.append("By Expected Outcome:")
104
+ for expected, counts in sorted(self.by_expected.items()):
105
+ total = counts.get("passed", 0) + counts.get("failed", 0)
106
+ passed = counts.get("passed", 0)
107
+ pct = (passed / total * 100) if total > 0 else 0
108
+ lines.append(f" {expected}: {passed}/{total} ({pct:.1f}%)")
109
+
110
+ # Failed cases
111
+ failed = [c for c in self.case_results if not c.passed]
112
+ if failed:
113
+ lines.append("")
114
+ lines.append("Failed Cases:")
115
+ for case in failed[:10]: # Show first 10 failed
116
+ reason = (
117
+ case.failure_reason or f"expected {case.expected}, got {case.actual_outcome}"
118
+ )
119
+ lines.append(f" ✗ {case.case_id}: {reason}")
120
+ if len(failed) > 10:
121
+ lines.append(f" ... and {len(failed) - 10} more")
122
+
123
+ return "\n".join(lines)
124
+
125
+
126
+ async def run_dataset(
127
+ scenario: UnifiedScenarioSpec,
128
+ model: str,
129
+ dataset: Dataset,
130
+ max_turns: int = 20,
131
+ max_tokens: int = 1024,
132
+ temperature: float = 0.7,
133
+ ) -> DatasetResult:
134
+ """Run scenario against all test cases in dataset."""
135
+ logger.info(
136
+ "Starting dataset run: scenario=%s, model=%s, dataset=%s, cases=%d",
137
+ scenario.id,
138
+ model,
139
+ dataset.id,
140
+ len(dataset.cases),
141
+ )
142
+ start_time = time.perf_counter()
143
+ runner = UnifiedRunner()
144
+ case_results: list[CaseResult] = []
145
+ by_expected: dict[str, dict[str, int]] = {}
146
+
147
+ for case in dataset.cases:
148
+ case_result = await _run_case(
149
+ runner, scenario, model, case, max_turns, max_tokens, temperature
150
+ )
151
+ case_results.append(case_result)
152
+
153
+ for exp in case.expected:
154
+ if exp not in by_expected:
155
+ by_expected[exp] = {"passed": 0, "failed": 0}
156
+ if case_result.passed:
157
+ by_expected[exp]["passed"] += 1
158
+ else:
159
+ by_expected[exp]["failed"] += 1
160
+
161
+ total_cases = len(case_results)
162
+ passed_cases = sum(1 for c in case_results if c.passed)
163
+ total_score = sum(c.goal_score for c in case_results)
164
+ total_pct = sum(c.percentage for c in case_results)
165
+
166
+ total_time_ms = int((time.perf_counter() - start_time) * 1000)
167
+
168
+ logger.info(
169
+ "Dataset run completed: passed=%d/%d (%.1f%%), time=%dms",
170
+ passed_cases,
171
+ total_cases,
172
+ (passed_cases / total_cases * 100) if total_cases > 0 else 0.0,
173
+ total_time_ms,
174
+ )
175
+
176
+ return DatasetResult(
177
+ scenario_id=scenario.id,
178
+ model=model,
179
+ dataset_id=dataset.id,
180
+ total_cases=total_cases,
181
+ passed_cases=passed_cases,
182
+ failed_cases=total_cases - passed_cases,
183
+ pass_rate=passed_cases / total_cases if total_cases > 0 else 0.0,
184
+ avg_score=total_score / total_cases if total_cases > 0 else 0.0,
185
+ avg_percentage=total_pct / total_cases if total_cases > 0 else 0.0,
186
+ case_results=case_results,
187
+ by_expected=by_expected,
188
+ total_time_ms=total_time_ms,
189
+ )
190
+
191
+
192
+ # Known action outcomes that represent final decisions (not process states)
193
+ # These are prioritized when determining actual_outcome
194
+ ACTION_OUTCOMES = frozenset(
195
+ {
196
+ # Escalation actions
197
+ "sar_filed",
198
+ "aps_referred",
199
+ "hold_placed",
200
+ "alert_cleared",
201
+ # Common action patterns
202
+ "approved",
203
+ "denied",
204
+ "rejected",
205
+ "escalated",
206
+ "flagged",
207
+ "blocked",
208
+ "completed",
209
+ "referred",
210
+ }
211
+ )
212
+
213
+
214
+ def _determine_actual_outcome(
215
+ final_state: dict[str, Any],
216
+ scenario: UnifiedScenarioSpec,
217
+ eval_goals: list[Any],
218
+ ) -> str | None:
219
+ """Determine the actual outcome from run results.
220
+
221
+ Priority order:
222
+ 1. Scenario-defined outcome goals that were achieved
223
+ 2. Known action outcomes from final_state
224
+ 3. None if no meaningful outcome found
225
+
226
+ Process states like 'checked_user', 'checked_transaction' are excluded
227
+ as they represent investigation steps, not final outcomes.
228
+ """
229
+ # 1. Check scenario-defined outcome goals (most authoritative)
230
+ if scenario.evaluation and scenario.evaluation.goals:
231
+ outcome_goal_ids = {g.id for g in scenario.evaluation.goals if g.outcome}
232
+ for eval_goal in eval_goals:
233
+ if eval_goal.id in outcome_goal_ids and eval_goal.achieved:
234
+ return eval_goal.id
235
+
236
+ # 2. Check final_state for known action outcomes
237
+ achieved_actions = [
238
+ key for key, value in final_state.items() if value is True and key in ACTION_OUTCOMES
239
+ ]
240
+ if achieved_actions:
241
+ # Return first achieved action (could also prioritize by a defined order)
242
+ return achieved_actions[0]
243
+
244
+ # 3. Check final_state for any custom outcome that looks like an action
245
+ # (ends with _filed, _placed, _referred, _cleared, etc.)
246
+ action_suffixes = ("_filed", "_placed", "_referred", "_cleared", "_approved", "_denied")
247
+ for key, value in final_state.items():
248
+ if value is True and any(key.endswith(suffix) for suffix in action_suffixes):
249
+ return key
250
+
251
+ return None
252
+
253
+
254
+ async def _run_case(
255
+ runner: UnifiedRunner,
256
+ scenario: UnifiedScenarioSpec,
257
+ model: str,
258
+ case: TestCase,
259
+ max_turns: int,
260
+ max_tokens: int,
261
+ temperature: float,
262
+ ) -> CaseResult:
263
+ """Run a single test case."""
264
+ start_time = time.perf_counter()
265
+
266
+ try:
267
+ result = await runner.run(
268
+ scenario=scenario,
269
+ model=model,
270
+ variables=case.variables,
271
+ max_turns=max_turns,
272
+ max_tokens=max_tokens,
273
+ temperature=temperature,
274
+ tool_overrides=case.tool_responses,
275
+ expected_outcome=case.expected[0] if case.expected else None,
276
+ )
277
+
278
+ latency_ms = int((time.perf_counter() - start_time) * 1000)
279
+
280
+ if result.error:
281
+ return CaseResult(
282
+ case_id=case.id,
283
+ expected=case.expected,
284
+ passed=False,
285
+ failure_reason=f"Run error: {result.error}",
286
+ latency_ms=latency_ms,
287
+ )
288
+
289
+ passed = False
290
+ actual_outcome: str | None = None
291
+ failure_reason: str | None = None
292
+
293
+ if result.evaluation:
294
+ goal_score = result.evaluation.total_score
295
+ max_score = result.evaluation.max_score
296
+ percentage = result.evaluation.percentage
297
+
298
+ final_state = result.final_state or {}
299
+
300
+ # Determine actual outcome with proper priority:
301
+ # 1. First, check evaluation goals marked as outcome=true (scenario-defined outcomes)
302
+ # 2. Then check final_state for known action outcomes
303
+ # 3. Process states (checked_*, etc.) are not considered outcomes
304
+ actual_outcome = _determine_actual_outcome(
305
+ final_state=final_state,
306
+ scenario=scenario,
307
+ eval_goals=result.evaluation.goals,
308
+ )
309
+
310
+ if case.expected:
311
+ # First check final_state for expected outcomes
312
+ for expected_id in case.expected:
313
+ if final_state.get(expected_id) is True:
314
+ passed = True
315
+ actual_outcome = expected_id
316
+ break
317
+
318
+ # Fallback: check evaluation goals
319
+ if not passed:
320
+ for expected_id in case.expected:
321
+ for eval_goal in result.evaluation.goals:
322
+ if eval_goal.id == expected_id and eval_goal.achieved:
323
+ passed = True
324
+ actual_outcome = expected_id
325
+ break
326
+ if passed:
327
+ break
328
+
329
+ if not passed:
330
+ expected_str = " or ".join(case.expected)
331
+ if actual_outcome:
332
+ failure_reason = f"expected {expected_str}, got {actual_outcome}"
333
+ else:
334
+ failure_reason = f"expected {expected_str}, no outcome achieved"
335
+ else:
336
+ # No expected outcome - pass based on score
337
+ passed = percentage >= 50.0
338
+ if not passed:
339
+ failure_reason = f"score too low ({percentage:.1f}%)"
340
+
341
+ return CaseResult(
342
+ case_id=case.id,
343
+ expected=case.expected,
344
+ actual_outcome=actual_outcome,
345
+ passed=passed,
346
+ goal_score=goal_score,
347
+ max_score=max_score,
348
+ percentage=percentage,
349
+ run_result=result,
350
+ failure_reason=failure_reason,
351
+ latency_ms=latency_ms,
352
+ )
353
+ return CaseResult(
354
+ case_id=case.id,
355
+ expected=case.expected,
356
+ passed=False,
357
+ failure_reason="No evaluation configured",
358
+ run_result=result,
359
+ latency_ms=latency_ms,
360
+ )
361
+
362
+ except Exception:
363
+ latency_ms = int((time.perf_counter() - start_time) * 1000)
364
+ logger.exception("Exception running case %s", case.id)
365
+ return CaseResult(
366
+ case_id=case.id,
367
+ expected=case.expected,
368
+ passed=False,
369
+ failure_reason="Exception: see logs for details",
370
+ latency_ms=latency_ms,
371
+ )
372
+
373
+
374
+ async def run_dataset_parallel(
375
+ scenario: UnifiedScenarioSpec,
376
+ model: str,
377
+ dataset: Dataset,
378
+ max_turns: int = 20,
379
+ max_tokens: int = 1024,
380
+ temperature: float = 0.7,
381
+ max_concurrent: int = 5,
382
+ ) -> DatasetResult:
383
+ """Run dataset with parallel case execution."""
384
+ logger.info(
385
+ "Starting parallel dataset run: scenario=%s, model=%s, dataset=%s, cases=%d, concurrency=%d",
386
+ scenario.id,
387
+ model,
388
+ dataset.id,
389
+ len(dataset.cases),
390
+ max_concurrent,
391
+ )
392
+ start_time = time.perf_counter()
393
+ runner = UnifiedRunner()
394
+ semaphore = asyncio.Semaphore(max_concurrent)
395
+
396
+ async def run_with_semaphore(case: TestCase) -> CaseResult:
397
+ async with semaphore:
398
+ return await _run_case(
399
+ runner, scenario, model, case, max_turns, max_tokens, temperature
400
+ )
401
+
402
+ case_results = await asyncio.gather(*[run_with_semaphore(case) for case in dataset.cases])
403
+
404
+ by_expected: dict[str, dict[str, int]] = {}
405
+ for case, case_result in zip(dataset.cases, case_results, strict=True):
406
+ for exp in case.expected:
407
+ if exp not in by_expected:
408
+ by_expected[exp] = {"passed": 0, "failed": 0}
409
+ if case_result.passed:
410
+ by_expected[exp]["passed"] += 1
411
+ else:
412
+ by_expected[exp]["failed"] += 1
413
+
414
+ total_cases = len(case_results)
415
+ passed_cases = sum(1 for c in case_results if c.passed)
416
+ total_score = sum(c.goal_score for c in case_results)
417
+ total_pct = sum(c.percentage for c in case_results)
418
+
419
+ total_time_ms = int((time.perf_counter() - start_time) * 1000)
420
+
421
+ logger.info(
422
+ "Parallel dataset run completed: passed=%d/%d (%.1f%%), time=%dms",
423
+ passed_cases,
424
+ total_cases,
425
+ (passed_cases / total_cases * 100) if total_cases > 0 else 0.0,
426
+ total_time_ms,
427
+ )
428
+
429
+ return DatasetResult(
430
+ scenario_id=scenario.id,
431
+ model=model,
432
+ dataset_id=dataset.id,
433
+ total_cases=total_cases,
434
+ passed_cases=passed_cases,
435
+ failed_cases=total_cases - passed_cases,
436
+ pass_rate=passed_cases / total_cases if total_cases > 0 else 0.0,
437
+ avg_score=total_score / total_cases if total_cases > 0 else 0.0,
438
+ avg_percentage=total_pct / total_cases if total_cases > 0 else 0.0,
439
+ case_results=list(case_results),
440
+ by_expected=by_expected,
441
+ total_time_ms=total_time_ms,
442
+ )
sandboxy/errors.py ADDED
@@ -0,0 +1,166 @@
1
+ """Structured error types for Sandboxy.
2
+
3
+ Provides typed exceptions with error codes and details for better
4
+ error handling and debugging.
5
+ """
6
+
7
+ from typing import Any
8
+
9
+
10
+ class SandboxyError(Exception):
11
+ """Base exception for Sandboxy operations.
12
+
13
+ All Sandboxy errors include:
14
+ - A human-readable message
15
+ - An error code for programmatic handling
16
+ - Optional details dictionary for debugging
17
+ """
18
+
19
+ code: str = "SANDBOXY_ERROR"
20
+
21
+ def __init__(
22
+ self,
23
+ message: str,
24
+ *,
25
+ code: str | None = None,
26
+ details: dict[str, Any] | None = None,
27
+ ):
28
+ super().__init__(message)
29
+ self.code = code or self.__class__.code
30
+ self.details = details or {}
31
+
32
+ def to_dict(self) -> dict[str, Any]:
33
+ """Convert to dictionary for API responses."""
34
+ return {
35
+ "error": self.code,
36
+ "message": str(self),
37
+ "details": self.details,
38
+ }
39
+
40
+
41
+ class ConfigurationError(SandboxyError):
42
+ """Error in configuration or environment setup."""
43
+
44
+ code = "CONFIG_ERROR"
45
+
46
+
47
+ class ModuleError(SandboxyError):
48
+ """Error related to module operations."""
49
+
50
+ code = "MODULE_ERROR"
51
+
52
+
53
+ class ModuleParseError(ModuleError):
54
+ """Error parsing a module definition (MDL/YAML)."""
55
+
56
+ code = "MODULE_PARSE_ERROR"
57
+
58
+ def __init__(self, message: str, *, path: str | None = None, line: int | None = None):
59
+ details = {}
60
+ if path:
61
+ details["path"] = path
62
+ if line:
63
+ details["line"] = line
64
+ super().__init__(message, details=details)
65
+
66
+
67
+ class ModuleNotFoundError(ModuleError):
68
+ """Module not found."""
69
+
70
+ code = "MODULE_NOT_FOUND"
71
+
72
+ def __init__(self, module_id: str):
73
+ super().__init__(f"Module not found: {module_id}", details={"module_id": module_id})
74
+
75
+
76
+ class ToolError(SandboxyError):
77
+ """Error related to tool operations."""
78
+
79
+ code = "TOOL_ERROR"
80
+
81
+
82
+ class ToolLoadError(ToolError):
83
+ """Error loading a tool."""
84
+
85
+ code = "TOOL_LOAD_ERROR"
86
+
87
+ def __init__(self, message: str, *, tool_name: str):
88
+ super().__init__(message, details={"tool": tool_name})
89
+
90
+
91
+ class ToolExecutionError(ToolError):
92
+ """Error executing a tool action."""
93
+
94
+ code = "TOOL_EXECUTION_ERROR"
95
+
96
+ def __init__(self, message: str, *, tool_name: str, action: str | None = None):
97
+ details = {"tool": tool_name}
98
+ if action:
99
+ details["action"] = action
100
+ super().__init__(message, details=details)
101
+
102
+
103
+ class EvaluationError(SandboxyError):
104
+ """Error during expression evaluation."""
105
+
106
+ code = "EVAL_ERROR"
107
+
108
+ def __init__(self, message: str, *, expression: str | None = None):
109
+ details = {}
110
+ if expression:
111
+ details["expression"] = expression
112
+ super().__init__(message, details=details)
113
+
114
+
115
+ class AgentError(SandboxyError):
116
+ """Error related to agent operations."""
117
+
118
+ code = "AGENT_ERROR"
119
+
120
+
121
+ class AgentNotFoundError(AgentError):
122
+ """Agent not found."""
123
+
124
+ code = "AGENT_NOT_FOUND"
125
+
126
+ def __init__(self, agent_id: str):
127
+ super().__init__(f"Agent not found: {agent_id}", details={"agent_id": agent_id})
128
+
129
+
130
+ class ProviderError(SandboxyError):
131
+ """Error from an LLM provider."""
132
+
133
+ code = "PROVIDER_ERROR"
134
+
135
+ def __init__(self, message: str, *, provider: str, model: str | None = None):
136
+ details = {"provider": provider}
137
+ if model:
138
+ details["model"] = model
139
+ super().__init__(message, details=details)
140
+
141
+
142
+ class ValidationError(SandboxyError):
143
+ """Input validation error."""
144
+
145
+ code = "VALIDATION_ERROR"
146
+
147
+ def __init__(self, message: str, *, field: str | None = None):
148
+ details = {}
149
+ if field:
150
+ details["field"] = field
151
+ super().__init__(message, details=details)
152
+
153
+
154
+ class SessionError(SandboxyError):
155
+ """Error related to session operations."""
156
+
157
+ code = "SESSION_ERROR"
158
+
159
+
160
+ class SessionNotFoundError(SessionError):
161
+ """Session not found."""
162
+
163
+ code = "SESSION_NOT_FOUND"
164
+
165
+ def __init__(self, session_id: str):
166
+ super().__init__(f"Session not found: {session_id}", details={"session_id": session_id})