sandboxy 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. sandboxy/__init__.py +3 -0
  2. sandboxy/agents/__init__.py +21 -0
  3. sandboxy/agents/base.py +66 -0
  4. sandboxy/agents/llm_prompt.py +308 -0
  5. sandboxy/agents/loader.py +222 -0
  6. sandboxy/api/__init__.py +5 -0
  7. sandboxy/api/app.py +76 -0
  8. sandboxy/api/routes/__init__.py +1 -0
  9. sandboxy/api/routes/agents.py +92 -0
  10. sandboxy/api/routes/local.py +1388 -0
  11. sandboxy/api/routes/tools.py +106 -0
  12. sandboxy/cli/__init__.py +1 -0
  13. sandboxy/cli/main.py +1196 -0
  14. sandboxy/cli/type_detector.py +48 -0
  15. sandboxy/config.py +49 -0
  16. sandboxy/core/__init__.py +1 -0
  17. sandboxy/core/async_runner.py +824 -0
  18. sandboxy/core/mdl_parser.py +441 -0
  19. sandboxy/core/runner.py +599 -0
  20. sandboxy/core/safe_eval.py +165 -0
  21. sandboxy/core/state.py +234 -0
  22. sandboxy/datasets/__init__.py +20 -0
  23. sandboxy/datasets/loader.py +193 -0
  24. sandboxy/datasets/runner.py +442 -0
  25. sandboxy/errors.py +166 -0
  26. sandboxy/local/context.py +235 -0
  27. sandboxy/local/results.py +173 -0
  28. sandboxy/logging.py +31 -0
  29. sandboxy/mcp/__init__.py +25 -0
  30. sandboxy/mcp/client.py +360 -0
  31. sandboxy/mcp/wrapper.py +99 -0
  32. sandboxy/providers/__init__.py +34 -0
  33. sandboxy/providers/anthropic_provider.py +271 -0
  34. sandboxy/providers/base.py +123 -0
  35. sandboxy/providers/http_client.py +101 -0
  36. sandboxy/providers/openai_provider.py +282 -0
  37. sandboxy/providers/openrouter.py +958 -0
  38. sandboxy/providers/registry.py +199 -0
  39. sandboxy/scenarios/__init__.py +11 -0
  40. sandboxy/scenarios/comparison.py +491 -0
  41. sandboxy/scenarios/loader.py +262 -0
  42. sandboxy/scenarios/runner.py +468 -0
  43. sandboxy/scenarios/unified.py +1434 -0
  44. sandboxy/session/__init__.py +21 -0
  45. sandboxy/session/manager.py +278 -0
  46. sandboxy/tools/__init__.py +34 -0
  47. sandboxy/tools/base.py +127 -0
  48. sandboxy/tools/loader.py +270 -0
  49. sandboxy/tools/yaml_tools.py +708 -0
  50. sandboxy/ui/__init__.py +27 -0
  51. sandboxy/ui/dist/assets/index-CgAkYWrJ.css +1 -0
  52. sandboxy/ui/dist/assets/index-D4zoGFcr.js +347 -0
  53. sandboxy/ui/dist/index.html +14 -0
  54. sandboxy/utils/__init__.py +3 -0
  55. sandboxy/utils/time.py +20 -0
  56. sandboxy-0.0.1.dist-info/METADATA +241 -0
  57. sandboxy-0.0.1.dist-info/RECORD +60 -0
  58. sandboxy-0.0.1.dist-info/WHEEL +4 -0
  59. sandboxy-0.0.1.dist-info/entry_points.txt +3 -0
  60. sandboxy-0.0.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,21 @@
1
+ """Session management for interactive Sandboxy sessions.
2
+
3
+ This module provides session management for coordinating interactive sessions
4
+ between WebSocket connections and the AsyncRunner. It maintains in-memory
5
+ session state for local development and testing.
6
+
7
+ Typical usage:
8
+ from sandboxy.session import SessionManager, Session
9
+
10
+ manager = SessionManager()
11
+ session = manager.create_session(module, agent)
12
+ event_queue = await manager.start_session(session.id)
13
+ """
14
+
15
+ from sandboxy.session.manager import Session, SessionManager, session_manager
16
+
17
+ __all__ = [
18
+ "Session",
19
+ "SessionManager",
20
+ "session_manager",
21
+ ]
@@ -0,0 +1,278 @@
1
+ """Session Manager - coordinates interactive sessions between WebSocket and AsyncRunner."""
2
+
3
+ import asyncio
4
+ import logging
5
+ from dataclasses import dataclass, field
6
+ from typing import Any
7
+ from uuid import uuid4
8
+
9
+ from sandboxy.agents.base import Agent
10
+ from sandboxy.core.async_runner import AsyncRunner, RunEvent
11
+ from sandboxy.core.state import ModuleSpec, SessionState
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ @dataclass
17
+ class Session:
18
+ """An active interactive session."""
19
+
20
+ id: str
21
+ module: ModuleSpec
22
+ agent: Agent
23
+ variables: dict[str, Any]
24
+ runner: AsyncRunner
25
+ events: list[RunEvent] = field(default_factory=list)
26
+ _run_task: asyncio.Task | None = None
27
+ _event_queue: asyncio.Queue[RunEvent] = field(default_factory=asyncio.Queue)
28
+
29
+ @property
30
+ def state(self) -> SessionState:
31
+ """Get current session state."""
32
+ return self.runner.session_state
33
+
34
+
35
+ class SessionManager:
36
+ """Manages active interactive sessions.
37
+
38
+ In-memory session store for local development and testing.
39
+ """
40
+
41
+ def __init__(self):
42
+ self._sessions: dict[str, Session] = {}
43
+
44
+ def create_session(
45
+ self,
46
+ module: ModuleSpec,
47
+ agent: Agent,
48
+ variables: dict[str, Any] | None = None,
49
+ ) -> Session:
50
+ """Create a new session.
51
+
52
+ Args:
53
+ module: Module specification to run.
54
+ agent: Agent to use for the session.
55
+ variables: Optional variables for the module.
56
+
57
+ Returns:
58
+ The created Session object.
59
+
60
+ """
61
+ session_id = str(uuid4())
62
+ runner = AsyncRunner(module, agent)
63
+
64
+ session = Session(
65
+ id=session_id,
66
+ module=module,
67
+ agent=agent,
68
+ variables=variables or {},
69
+ runner=runner,
70
+ )
71
+
72
+ self._sessions[session_id] = session
73
+ logger.info("Created session %s for module %s", session_id, module.name)
74
+ return session
75
+
76
+ def get_session(self, session_id: str) -> Session | None:
77
+ """Get a session by ID."""
78
+ return self._sessions.get(session_id)
79
+
80
+ def delete_session(self, session_id: str) -> bool:
81
+ """Delete a session.
82
+
83
+ Returns:
84
+ True if session was deleted, False if not found.
85
+
86
+ """
87
+ if session_id in self._sessions:
88
+ session = self._sessions[session_id]
89
+ # Cancel running task if any
90
+ if session._run_task and not session._run_task.done():
91
+ session._run_task.cancel()
92
+ del self._sessions[session_id]
93
+ logger.info("Deleted session %s", session_id)
94
+ return True
95
+ logger.debug("Attempted to delete non-existent session %s", session_id)
96
+ return False
97
+
98
+ def list_sessions(self) -> list[Session]:
99
+ """List all active sessions."""
100
+ return list(self._sessions.values())
101
+
102
+ async def start_session(self, session_id: str) -> asyncio.Queue[RunEvent]:
103
+ """Start running a session.
104
+
105
+ Args:
106
+ session_id: ID of the session to start.
107
+
108
+ Returns:
109
+ Queue that will receive events as they occur.
110
+
111
+ Raises:
112
+ ValueError: If session not found.
113
+
114
+ """
115
+ session = self.get_session(session_id)
116
+ if not session:
117
+ logger.warning("Attempted to start non-existent session %s", session_id)
118
+ raise ValueError(f"Session not found: {session_id}")
119
+
120
+ # Start the runner in a background task
121
+ session._run_task = asyncio.create_task(self._run_session(session))
122
+ logger.info("Started session %s", session_id)
123
+
124
+ return session._event_queue
125
+
126
+ async def _run_session(self, session: Session) -> None:
127
+ """Run a session, pushing events to its queue."""
128
+ logger.debug("Running session %s", session.id)
129
+ try:
130
+ async for event in session.runner.run():
131
+ session.events.append(event)
132
+ await session._event_queue.put(event)
133
+ logger.debug("Session %s emitted event: %s", session.id, event.type)
134
+
135
+ except asyncio.CancelledError:
136
+ logger.info("Session %s was cancelled", session.id)
137
+ except Exception as e:
138
+ logger.exception("Session %s encountered error: %s", session.id, e)
139
+ # Push error event
140
+ error_event = RunEvent(type="error", payload={"message": str(e)})
141
+ session.events.append(error_event)
142
+ await session._event_queue.put(error_event)
143
+
144
+ def provide_input(self, session_id: str, content: str) -> None:
145
+ """Provide user input for a session.
146
+
147
+ Args:
148
+ session_id: ID of the session.
149
+ content: User's input text.
150
+
151
+ Raises:
152
+ ValueError: If session not found.
153
+ RuntimeError: If session is not awaiting input.
154
+
155
+ """
156
+ session = self.get_session(session_id)
157
+ if not session:
158
+ logger.warning("Attempted to provide input to non-existent session %s", session_id)
159
+ raise ValueError(f"Session not found: {session_id}")
160
+
161
+ logger.debug("Providing input to session %s", session_id)
162
+ session.runner.provide_input(content)
163
+
164
+ def inject_event(
165
+ self,
166
+ session_id: str,
167
+ tool_name: str,
168
+ event_type: str,
169
+ args: dict[str, Any] | None = None,
170
+ ) -> dict[str, Any]:
171
+ """Inject a game event into a session.
172
+
173
+ This triggers an event in the specified tool (e.g., "heatwave" in
174
+ the lemonade stand). The event modifies game state and returns
175
+ a description that should be shown to the user/agent.
176
+
177
+ Args:
178
+ session_id: ID of the session.
179
+ tool_name: Name of the tool to call.
180
+ event_type: Type of event to trigger.
181
+ args: Optional additional arguments.
182
+
183
+ Returns:
184
+ The event result data from the tool.
185
+
186
+ Raises:
187
+ ValueError: If session not found or event fails.
188
+
189
+ """
190
+ session = self.get_session(session_id)
191
+ if not session:
192
+ logger.warning("Attempted to inject event into non-existent session %s", session_id)
193
+ raise ValueError(f"Session not found: {session_id}")
194
+
195
+ logger.info(
196
+ "Injecting event %s into session %s via tool %s", event_type, session_id, tool_name
197
+ )
198
+ return session.runner.inject_event(tool_name, event_type, args)
199
+
200
+ def pause_session(self, session_id: str) -> bool:
201
+ """Pause a session (not fully implemented yet)."""
202
+ session = self.get_session(session_id)
203
+ if not session:
204
+ logger.debug("Attempted to pause non-existent session %s", session_id)
205
+ return False
206
+ # TODO: Implement proper pause mechanism
207
+ logger.debug("Pause requested for session %s (not yet implemented)", session_id)
208
+ return True
209
+
210
+ def resume_session(self, session_id: str) -> bool:
211
+ """Resume a paused session (not fully implemented yet)."""
212
+ session = self.get_session(session_id)
213
+ if not session:
214
+ logger.debug("Attempted to resume non-existent session %s", session_id)
215
+ return False
216
+ # TODO: Implement proper resume mechanism
217
+ logger.debug("Resume requested for session %s (not yet implemented)", session_id)
218
+ return True
219
+
220
+ def mark_session_ended(self, session_id: str) -> bool:
221
+ """Mark a session as ended (connection closed).
222
+
223
+ Unlike delete_session, this preserves the session data for
224
+ potential replay or export. The session will be cleaned up
225
+ after a timeout or when explicitly deleted.
226
+
227
+ Args:
228
+ session_id: ID of the session.
229
+
230
+ Returns:
231
+ True if session was marked, False if not found.
232
+
233
+ """
234
+ session = self.get_session(session_id)
235
+ if not session:
236
+ logger.debug("Attempted to mark non-existent session %s as ended", session_id)
237
+ return False
238
+
239
+ # Cancel running task if any
240
+ if session._run_task and not session._run_task.done():
241
+ session._run_task.cancel()
242
+
243
+ logger.info("Marked session %s as ended", session_id)
244
+ return True
245
+
246
+ def get_session_events(self, session_id: str) -> list[RunEvent]:
247
+ """Get all events for a session.
248
+
249
+ Args:
250
+ session_id: ID of the session.
251
+
252
+ Returns:
253
+ List of events, or empty list if session not found.
254
+
255
+ """
256
+ session = self.get_session(session_id)
257
+ if not session:
258
+ return []
259
+ return list(session.events)
260
+
261
+ def get_session_state(self, session_id: str) -> dict[str, Any] | None:
262
+ """Get the current environment state for a session.
263
+
264
+ Args:
265
+ session_id: ID of the session.
266
+
267
+ Returns:
268
+ The environment state dict, or None if session not found.
269
+
270
+ """
271
+ session = self.get_session(session_id)
272
+ if not session:
273
+ return None
274
+ return session.runner.env_state
275
+
276
+
277
+ # Global session manager instance
278
+ session_manager = SessionManager()
@@ -0,0 +1,34 @@
1
+ """Tools module - Tool interface, loader, and YAML tool implementations.
2
+
3
+ This module provides the core abstractions for defining and loading tools
4
+ that can be used by AI agents in sandbox scenarios.
5
+
6
+ Submodules:
7
+ sandboxy.tools.base:
8
+ - Tool: Protocol defining the tool interface
9
+ - BaseTool: Base class for tool implementations
10
+ - ToolConfig: Configuration model for tool instances
11
+ - ToolResult: Result model for tool invocations
12
+
13
+ sandboxy.tools.loader:
14
+ - ToolLoader: Loader for creating tool instances from config
15
+ - get_yaml_tool_libraries: List available YAML tool libraries
16
+ - load_tool_class: Load a tool class from module path
17
+ - load_yaml_tool_library: Load tools from a YAML library
18
+ - load_yaml_tools_from_scenario: Load tools from scenario data
19
+
20
+ sandboxy.tools.yaml_tools:
21
+ - YamlMockTool: YAML-defined mock tool implementation
22
+ - YamlToolLoader: Loader for YAML tool libraries
23
+ - ActionSpec: Specification for a tool action
24
+ - ParamSchema: Schema for action parameters
25
+ - SideEffect: State modification specification
26
+ - ToolSpec: Full tool specification
27
+ - ToolLibrary: Collection of tool specifications
28
+ - load_scenario_tools: Load tools from scenario data
29
+
30
+ Note:
31
+ Import directly from submodules to avoid circular dependencies:
32
+ from sandboxy.tools.base import BaseTool, ToolConfig, ToolResult
33
+ from sandboxy.tools.loader import ToolLoader
34
+ """
sandboxy/tools/base.py ADDED
@@ -0,0 +1,127 @@
1
+ """Base tool interface and models.
2
+
3
+ This module defines the core abstractions for tools:
4
+ - ToolConfig: Configuration for instantiating a tool
5
+ - ToolResult: Result of a tool invocation
6
+ - Tool: Protocol defining the tool interface
7
+ - BaseTool: Base implementation for custom tools
8
+ """
9
+
10
+ from typing import Any, Protocol
11
+
12
+ from pydantic import BaseModel, Field
13
+
14
+
15
+ class ToolConfig(BaseModel):
16
+ """Configuration for a tool instance.
17
+
18
+ Attributes:
19
+ name: Unique identifier for this tool instance.
20
+ type: Tool type identifier (e.g., 'yaml_tool', 'mock_lemonade').
21
+ description: Human-readable description of the tool.
22
+ config: Tool-specific configuration options.
23
+ """
24
+
25
+ name: str
26
+ type: str
27
+ description: str = ""
28
+ config: dict[str, Any] = Field(default_factory=dict)
29
+
30
+
31
+ class ToolResult(BaseModel):
32
+ """Result of a tool invocation.
33
+
34
+ Attributes:
35
+ success: Whether the invocation succeeded.
36
+ data: Result data on success (type varies by action).
37
+ error: Error message on failure.
38
+ """
39
+
40
+ success: bool
41
+ data: Any = None
42
+ error: str | None = None
43
+
44
+
45
+ class Tool(Protocol):
46
+ """Protocol for tool implementations.
47
+
48
+ Tools provide actions that agents can invoke to interact with
49
+ simulated environments. Each tool has a name, description, and
50
+ a set of available actions.
51
+
52
+ Attributes:
53
+ name: Unique identifier for the tool.
54
+ description: Human-readable description of the tool's purpose.
55
+ """
56
+
57
+ name: str
58
+ description: str
59
+
60
+ def invoke(self, action: str, args: dict[str, Any], env_state: dict[str, Any]) -> ToolResult:
61
+ """Invoke a tool action.
62
+
63
+ Args:
64
+ action: The action to perform (e.g., "get_order", "refund_order").
65
+ args: Arguments for the action.
66
+ env_state: Current environment state (can be modified by tools).
67
+
68
+ Returns:
69
+ Result of the tool invocation.
70
+ """
71
+ ...
72
+
73
+ def get_actions(self) -> list[dict[str, Any]]:
74
+ """Get list of available actions with their schemas.
75
+
76
+ Returns:
77
+ List of action definitions with name, description, and parameters.
78
+ """
79
+ ...
80
+
81
+
82
+ class BaseTool:
83
+ """Base class for tool implementations.
84
+
85
+ Subclass this to create custom tools. Override `invoke` to handle
86
+ actions and `get_actions` to advertise available actions.
87
+
88
+ Attributes:
89
+ name: Tool instance name from config.
90
+ description: Tool description from config.
91
+ config: Tool-specific configuration dict.
92
+ """
93
+
94
+ def __init__(self, config: ToolConfig) -> None:
95
+ """Initialize the tool from configuration.
96
+
97
+ Args:
98
+ config: Tool configuration containing name, description, and options.
99
+ """
100
+ self.name = config.name
101
+ self.description = config.description
102
+ self.config = config.config
103
+
104
+ def invoke(self, action: str, args: dict[str, Any], env_state: dict[str, Any]) -> ToolResult:
105
+ """Invoke a tool action.
106
+
107
+ Override in subclasses to implement action handling.
108
+
109
+ Args:
110
+ action: The action to perform.
111
+ args: Arguments for the action.
112
+ env_state: Current environment state (can be modified).
113
+
114
+ Returns:
115
+ Result of the action invocation.
116
+ """
117
+ return ToolResult(success=False, error=f"Unknown action: {action}")
118
+
119
+ def get_actions(self) -> list[dict[str, Any]]:
120
+ """Get list of available actions with their schemas.
121
+
122
+ Override in subclasses to advertise available actions.
123
+
124
+ Returns:
125
+ List of action definitions with name, description, and parameters.
126
+ """
127
+ return []