mcp-python-exec-sandbox 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_python_exec_sandbox-0.1.2/.github/workflows/ci.yml +25 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/CLAUDE.md +1 -1
- mcp_python_exec_sandbox-0.1.2/PKG-INFO +307 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/e2e_tests/test_data_science.py +8 -4
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/e2e_tests/test_docker_sandbox.py +8 -3
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/e2e_tests/test_mcp_protocol.py +86 -56
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/e2e_tests/test_package_install.py +2 -2
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/e2e_tests/test_real_execution.py +0 -1
- mcp_python_exec_sandbox-0.1.2/pyproject.toml +49 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/src/mcp_python_exec_sandbox/__main__.py +0 -1
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/src/mcp_python_exec_sandbox/cache.py +9 -7
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/src/mcp_python_exec_sandbox/config.py +2 -4
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/src/mcp_python_exec_sandbox/executor.py +2 -5
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/src/mcp_python_exec_sandbox/output.py +1 -1
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/src/mcp_python_exec_sandbox/sandbox.py +2 -6
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/src/mcp_python_exec_sandbox/sandbox_docker.py +15 -7
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/src/mcp_python_exec_sandbox/sandbox_linux.py +33 -12
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/src/mcp_python_exec_sandbox/sandbox_macos.py +4 -5
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/src/mcp_python_exec_sandbox/script.py +1 -3
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/src/mcp_python_exec_sandbox/server.py +53 -53
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/tests/test_config.py +16 -9
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/tests/test_executor.py +31 -20
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/tests/test_integration.py +1 -5
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/tests/test_sandbox.py +1 -1
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/tests/test_script.py +13 -13
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/tests/test_server.py +0 -2
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/uv.lock +28 -1
- mcp_python_exec_sandbox-0.1.0/PKG-INFO +0 -9
- mcp_python_exec_sandbox-0.1.0/pyproject.toml +0 -24
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/.devcontainer/Dockerfile +0 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/.devcontainer/devcontainer.json +0 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/.gitignore +0 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/LICENSE +0 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/README.md +0 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/e2e_tests/__init__.py +0 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/e2e_tests/test_sandbox_enforcement.py +0 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/profiles/Dockerfile +0 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/profiles/sandbox_macos.sb +0 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/profiles/warmup_packages.txt +0 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/src/mcp_python_exec_sandbox/__init__.py +0 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/src/mcp_python_exec_sandbox/errors.py +0 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/tests/__init__.py +0 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/tests/conftest.py +0 -0
- {mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/tests/test_output.py +0 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
- uses: astral-sh/setup-uv@v4
|
|
15
|
+
- run: uv sync --dev
|
|
16
|
+
- run: uv run ruff check .
|
|
17
|
+
- run: uv run ruff format --check .
|
|
18
|
+
|
|
19
|
+
test:
|
|
20
|
+
runs-on: ubuntu-latest
|
|
21
|
+
steps:
|
|
22
|
+
- uses: actions/checkout@v4
|
|
23
|
+
- uses: astral-sh/setup-uv@v4
|
|
24
|
+
- run: uv sync --dev
|
|
25
|
+
- run: uv run pytest tests/ -v
|
|
@@ -37,7 +37,7 @@ uv run pytest e2e_tests/ -v # E2E tests (slow, needs network)
|
|
|
37
37
|
|
|
38
38
|
- Run `uv run pytest tests/ -v` before committing. All tests must pass.
|
|
39
39
|
- Keep dependencies minimal. Do not add runtime deps without strong justification.
|
|
40
|
-
-
|
|
40
|
+
- Lint with `uv run ruff check .` and format with `uv run ruff format --check .` before committing. Fix issues with `--fix` / `ruff format .`.
|
|
41
41
|
- Tool docstrings in `server.py` are user-facing — they become the MCP tool descriptions that agents see. Write them for an LLM audience: include examples, avoid unexplained jargon, link PEPs.
|
|
42
42
|
- Always pin versions in examples (e.g. `"pandas>=2.2"` not `"pandas"`).
|
|
43
43
|
- Sandbox backends must degrade gracefully: if the tool (bwrap, sandbox-exec, docker) is missing, fall back to `NoopSandbox` with a warning.
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mcp-python-exec-sandbox
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: MCP server for secure Python script execution with automatic dependency management
|
|
5
|
+
Project-URL: Homepage, https://github.com/lu-zhengda/mcp-python-exec-sandbox
|
|
6
|
+
Project-URL: Repository, https://github.com/lu-zhengda/mcp-python-exec-sandbox
|
|
7
|
+
Project-URL: Issues, https://github.com/lu-zhengda/mcp-python-exec-sandbox/issues
|
|
8
|
+
Author: Zhengda Lu
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: agent,ai,execution,mcp,python,sandbox
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Security
|
|
18
|
+
Classifier: Topic :: Software Development :: Interpreters
|
|
19
|
+
Requires-Python: >=3.13
|
|
20
|
+
Requires-Dist: fastmcp<3,>=2.0
|
|
21
|
+
Requires-Dist: tomli-w>=1.0
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
# mcp-python-exec-sandbox
|
|
25
|
+
|
|
26
|
+
Sandboxed Python execution for AI agents. Scripts run in ephemeral, isolated environments with inline dependencies ([PEP 723](https://peps.python.org/pep-0723/)) -- **zero host pollution, zero leftover venvs, zero package conflicts**.
|
|
27
|
+
|
|
28
|
+
## Why?
|
|
29
|
+
|
|
30
|
+
Every coding agent can already run Python on your host. The problem is what happens next: packages accumulate, venvs sprawl, and a rogue `pip install` breaks your system. **mcp-python-exec-sandbox** eliminates this:
|
|
31
|
+
|
|
32
|
+
- Scripts execute in a sandbox (bubblewrap on Linux, sandbox-exec on macOS, Docker everywhere)
|
|
33
|
+
- Dependencies are declared inline and resolved ephemerally via `uv`
|
|
34
|
+
- Nothing touches your host's Python, site-packages, or virtualenvs
|
|
35
|
+
- Each execution is isolated and disposable
|
|
36
|
+
|
|
37
|
+
## Features
|
|
38
|
+
|
|
39
|
+
- **Sandboxed execution** -- platform-specific isolation prevents host filesystem access
|
|
40
|
+
- **PEP 723 inline metadata** -- declare dependencies directly in scripts with `# /// script` blocks
|
|
41
|
+
- **Multi-version Python** -- run scripts on Python 3.13, 3.14, or 3.15 (uv downloads the right version automatically)
|
|
42
|
+
- **Ephemeral environments** -- dependencies are resolved per-execution, never persisted
|
|
43
|
+
- **Package caching** -- uv's global cache makes repeat installs near-instant
|
|
44
|
+
- **Timeout enforcement** -- configurable per-execution timeouts
|
|
45
|
+
- **Output truncation** -- prevents runaway output from overwhelming the agent
|
|
46
|
+
|
|
47
|
+
## Prerequisites
|
|
48
|
+
|
|
49
|
+
All setups require:
|
|
50
|
+
|
|
51
|
+
- **Python 3.13+** -- to run the MCP server process
|
|
52
|
+
- **[uv](https://docs.astral.sh/uv/getting-started/installation/)** -- manages script execution, dependency resolution, and Python version downloads. Also provides `uvx` for running the server without installing it globally.
|
|
53
|
+
|
|
54
|
+
Additional requirements depend on your chosen sandbox backend:
|
|
55
|
+
|
|
56
|
+
| Setup | Additional requirements | Install |
|
|
57
|
+
|-------|------------------------|---------|
|
|
58
|
+
| **Native sandbox (Linux)** | [bubblewrap](https://github.com/containers/bubblewrap) | `sudo apt install bubblewrap` |
|
|
59
|
+
| **Native sandbox (macOS)** | None -- `sandbox-exec` is built into macOS | -- |
|
|
60
|
+
| **Docker sandbox** | [Docker Engine](https://docs.docker.com/engine/install/) | See Docker docs |
|
|
61
|
+
| **No sandbox** | None | -- |
|
|
62
|
+
|
|
63
|
+
> **Host Python vs. execution Python:** These are independent. Python 3.13+ is needed to run the server process itself. The `--python-version` flag controls which Python version your *scripts* execute on -- uv downloads the target version automatically. You do not need to install Python 3.14 or 3.15 on your host to run scripts on those versions.
|
|
64
|
+
|
|
65
|
+
## Quick start
|
|
66
|
+
|
|
67
|
+
### Claude Code (native sandbox -- recommended)
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
claude mcp add python-sandbox -- uvx mcp-python-exec-sandbox --sandbox-backend native
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Claude Code (Docker sandbox)
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
docker build -t mcp-python-exec-sandbox profiles/
|
|
77
|
+
claude mcp add python-sandbox -- uvx mcp-python-exec-sandbox --sandbox-backend docker
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
> The Docker image build requires the repo source. Clone it first: `git clone https://github.com/lu-zhengda/mcp-python-exec-sandbox.git`
|
|
81
|
+
|
|
82
|
+
### Claude Code (no sandbox)
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
claude mcp add python-sandbox -- uvx mcp-python-exec-sandbox --sandbox-backend none
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Manual JSON config
|
|
89
|
+
|
|
90
|
+
```json
|
|
91
|
+
{
|
|
92
|
+
"mcpServers": {
|
|
93
|
+
"python-sandbox": {
|
|
94
|
+
"command": "uvx",
|
|
95
|
+
"args": ["mcp-python-exec-sandbox", "--sandbox-backend", "native"]
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Multi-version Python
|
|
102
|
+
|
|
103
|
+
Use `--python-version` to target a specific Python version. uv downloads it automatically -- no manual install needed.
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
# Python 3.13 (default)
|
|
107
|
+
uvx mcp-python-exec-sandbox --python-version 3.13
|
|
108
|
+
|
|
109
|
+
# Python 3.14
|
|
110
|
+
uvx mcp-python-exec-sandbox --python-version 3.14
|
|
111
|
+
|
|
112
|
+
# Python 3.15
|
|
113
|
+
uvx mcp-python-exec-sandbox --python-version 3.15
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
This works across all sandbox backends. The Docker sandbox uses uv inside the container to manage Python versions, so the same `--python-version` flag applies.
|
|
117
|
+
|
|
118
|
+
## Tools
|
|
119
|
+
|
|
120
|
+
### `execute_python`
|
|
121
|
+
|
|
122
|
+
Execute a Python script with automatic dependency management.
|
|
123
|
+
|
|
124
|
+
| Parameter | Type | Default | Description |
|
|
125
|
+
|-----------|------|---------|-------------|
|
|
126
|
+
| `script` | str | required | Python source code, may include PEP 723 inline metadata |
|
|
127
|
+
| `dependencies` | list[str] | `[]` | Extra PEP 508 dependency specifiers to merge |
|
|
128
|
+
| `timeout_seconds` | int | 30 | Maximum execution time (1--300) |
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
# Simple script
|
|
132
|
+
execute_python(script="print('hello world')")
|
|
133
|
+
|
|
134
|
+
# Script with dependencies
|
|
135
|
+
execute_python(
|
|
136
|
+
script="import requests; print(requests.get('https://httpbin.org/get').status_code)",
|
|
137
|
+
dependencies=["requests"]
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Script with inline PEP 723 metadata
|
|
141
|
+
execute_python(script="""
|
|
142
|
+
# /// script
|
|
143
|
+
# dependencies = ["pandas", "matplotlib"]
|
|
144
|
+
# ///
|
|
145
|
+
|
|
146
|
+
import pandas as pd
|
|
147
|
+
print(pd.DataFrame({'a': [1,2,3]}).describe())
|
|
148
|
+
""")
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### `check_environment`
|
|
152
|
+
|
|
153
|
+
Returns information about the execution environment: Python version, uv version, platform, sandbox status, and configuration.
|
|
154
|
+
|
|
155
|
+
### `validate_script`
|
|
156
|
+
|
|
157
|
+
Validates a script's PEP 723 metadata and dependencies without executing it.
|
|
158
|
+
|
|
159
|
+
| Parameter | Type | Default | Description |
|
|
160
|
+
|-----------|------|---------|-------------|
|
|
161
|
+
| `script` | str | required | Python source code to validate |
|
|
162
|
+
| `dependencies` | list[str] | `[]` | Extra dependency specifiers to validate |
|
|
163
|
+
|
|
164
|
+
## Sandbox backends
|
|
165
|
+
|
|
166
|
+
| Backend | Platform | Tool | Notes |
|
|
167
|
+
|---------|----------|------|-------|
|
|
168
|
+
| `native` | Linux | bubblewrap | Namespace isolation, network allowed |
|
|
169
|
+
| `native` | macOS | sandbox-exec | Seatbelt profiles, network allowed |
|
|
170
|
+
| `docker` | Any | Docker | Container isolation, resource limits |
|
|
171
|
+
| `none` | Any | -- | No sandboxing (not recommended) |
|
|
172
|
+
|
|
173
|
+
If the requested sandbox tool is unavailable, the server falls back to `none` with a warning.
|
|
174
|
+
|
|
175
|
+
### Docker sandbox setup
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
docker build -t mcp-python-exec-sandbox profiles/
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## CLI options
|
|
182
|
+
|
|
183
|
+
```
|
|
184
|
+
mcp-python-exec-sandbox [OPTIONS]
|
|
185
|
+
|
|
186
|
+
Options:
|
|
187
|
+
--python-version TEXT Python version for execution (default: 3.13)
|
|
188
|
+
--sandbox-backend TEXT native | docker | none (default: native)
|
|
189
|
+
--max-timeout INT Maximum allowed timeout in seconds (default: 300)
|
|
190
|
+
--default-timeout INT Default timeout in seconds (default: 30)
|
|
191
|
+
--max-output-bytes INT Maximum output size in bytes (default: 102400)
|
|
192
|
+
--no-warm-cache Skip cache warming on startup
|
|
193
|
+
--uv-path TEXT Path to uv binary (default: uv)
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
## Development
|
|
197
|
+
|
|
198
|
+
### Setup
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
git clone https://github.com/lu-zhengda/mcp-python-exec-sandbox.git
|
|
202
|
+
cd mcp-python-exec-sandbox
|
|
203
|
+
uv sync --dev
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
### Project structure
|
|
207
|
+
|
|
208
|
+
```
|
|
209
|
+
src/mcp_python_exec_sandbox/ # Package source
|
|
210
|
+
server.py # FastMCP server + tool definitions
|
|
211
|
+
executor.py # uv subprocess orchestration
|
|
212
|
+
script.py # PEP 723 metadata parsing/merging
|
|
213
|
+
sandbox.py # Sandbox ABC + factory
|
|
214
|
+
sandbox_{linux,macos,docker}.py
|
|
215
|
+
config.py, cache.py, output.py, errors.py
|
|
216
|
+
tests/ # Unit + integration tests (mocked or local uv)
|
|
217
|
+
e2e_tests/ # End-to-end tests (require uv + network)
|
|
218
|
+
profiles/ # Dockerfile, macOS seatbelt profile, warmup packages
|
|
219
|
+
.devcontainer/ # Devcontainer for Linux sandbox testing from macOS
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
### Running tests
|
|
223
|
+
|
|
224
|
+
**Unit and integration tests** -- fast, run everywhere:
|
|
225
|
+
|
|
226
|
+
```bash
|
|
227
|
+
uv run pytest tests/ -v
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
**E2E tests** -- require `uv` and network access. These exercise real script execution, package installation, MCP protocol flow, and sandbox enforcement:
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
uv run pytest e2e_tests/ -v
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
### Docker sandbox tests
|
|
237
|
+
|
|
238
|
+
The Docker E2E tests (`e2e_tests/test_docker_sandbox.py`) verify execution, dependency installation, read-only filesystem enforcement, host isolation, and timeout handling through the Docker backend.
|
|
239
|
+
|
|
240
|
+
Prerequisites:
|
|
241
|
+
|
|
242
|
+
1. Docker must be installed and running
|
|
243
|
+
2. Build the sandbox image:
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
docker build -t mcp-python-exec-sandbox profiles/
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
Then run:
|
|
250
|
+
|
|
251
|
+
```bash
|
|
252
|
+
uv run pytest e2e_tests/test_docker_sandbox.py -v
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
These tests are automatically skipped if Docker is unavailable or the image hasn't been built.
|
|
256
|
+
|
|
257
|
+
### Linux sandbox tests (devcontainer)
|
|
258
|
+
|
|
259
|
+
The Linux sandbox tests (`e2e_tests/test_sandbox_enforcement.py::test_linux_sandbox_blocks_etc_shadow`) use bubblewrap (`bwrap`) for namespace isolation. They are skipped on macOS because `bwrap` is Linux-only.
|
|
260
|
+
|
|
261
|
+
To run them from macOS, use the included devcontainer which provides Ubuntu 24.04 with `bwrap` pre-installed:
|
|
262
|
+
|
|
263
|
+
**VS Code:**
|
|
264
|
+
|
|
265
|
+
1. Install the [Dev Containers](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) extension
|
|
266
|
+
2. Open the project and select **Reopen in Container**
|
|
267
|
+
3. In the integrated terminal:
|
|
268
|
+
|
|
269
|
+
```bash
|
|
270
|
+
uv run pytest e2e_tests/test_sandbox_enforcement.py -v
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
**CLI:**
|
|
274
|
+
|
|
275
|
+
```bash
|
|
276
|
+
# Install the devcontainer CLI (once)
|
|
277
|
+
npm install -g @devcontainers/cli
|
|
278
|
+
|
|
279
|
+
# Build and start the container
|
|
280
|
+
devcontainer up --workspace-folder .
|
|
281
|
+
|
|
282
|
+
# Run the Linux sandbox tests inside the container
|
|
283
|
+
devcontainer exec --workspace-folder . uv run pytest e2e_tests/test_sandbox_enforcement.py -v
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
### Test matrix
|
|
287
|
+
|
|
288
|
+
| Test suite | Command | Requirements |
|
|
289
|
+
|------------|---------|-------------|
|
|
290
|
+
| Unit tests | `uv run pytest tests/ -v` | `uv` |
|
|
291
|
+
| Integration tests | `uv run pytest tests/test_integration.py -v` | `uv` |
|
|
292
|
+
| E2E (general) | `uv run pytest e2e_tests/ -v` | `uv`, network |
|
|
293
|
+
| E2E (Docker sandbox) | `uv run pytest e2e_tests/test_docker_sandbox.py -v` | `uv`, Docker, sandbox image |
|
|
294
|
+
| E2E (Linux/bwrap sandbox) | `uv run pytest e2e_tests/test_sandbox_enforcement.py -v` | `uv`, Linux with `bwrap` (or devcontainer) |
|
|
295
|
+
|
|
296
|
+
### Contributing
|
|
297
|
+
|
|
298
|
+
- One logical change per commit. Descriptive commit message (imperative mood).
|
|
299
|
+
- Run `uv run pytest tests/ -v` before committing -- all tests must pass.
|
|
300
|
+
- Add tests for new functionality: unit tests in `tests/`, E2E in `e2e_tests/` if it needs real execution.
|
|
301
|
+
- Keep dependencies minimal. Do not add runtime deps without strong justification.
|
|
302
|
+
- Tool docstrings in `server.py` are user-facing MCP tool descriptions. Write them for an LLM audience.
|
|
303
|
+
- Sandbox backends must degrade gracefully: if the tool is missing, fall back to `NoopSandbox` with a warning.
|
|
304
|
+
|
|
305
|
+
## License
|
|
306
|
+
|
|
307
|
+
MIT
|
{mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/e2e_tests/test_data_science.py
RENAMED
|
@@ -94,7 +94,9 @@ print("=== Top Products ===")
|
|
|
94
94
|
for prod, rev in top.items():
|
|
95
95
|
print(f" {prod}: {rev:.2f}")
|
|
96
96
|
|
|
97
|
-
pivot = df.pivot_table(
|
|
97
|
+
pivot = df.pivot_table(
|
|
98
|
+
values="revenue", index="product", columns="region", aggfunc="mean"
|
|
99
|
+
).round(2)
|
|
98
100
|
print("\\n=== Pivot ===")
|
|
99
101
|
print(pivot.to_string())
|
|
100
102
|
""",
|
|
@@ -313,7 +315,9 @@ dist = np.random.uniform(1, 30, n)
|
|
|
313
315
|
|
|
314
316
|
price = 150*sqft + 20000*bedrooms - 1000*age - 2000*dist + 50000 + np.random.normal(0, 20000, n)
|
|
315
317
|
|
|
316
|
-
df = pd.DataFrame({
|
|
318
|
+
df = pd.DataFrame({
|
|
319
|
+
"sqft": sqft, "bedrooms": bedrooms, "age": age, "distance": dist, "price": price
|
|
320
|
+
})
|
|
317
321
|
features = ["sqft", "bedrooms", "age", "distance"]
|
|
318
322
|
X, y = df[features].values, df["price"].values
|
|
319
323
|
|
|
@@ -527,7 +531,7 @@ class TestPEP723InlineMetadata:
|
|
|
527
531
|
@pytest.mark.asyncio
|
|
528
532
|
async def test_httpx_pydantic_inline(self):
|
|
529
533
|
"""Script with inline PEP 723 block declaring httpx + pydantic."""
|
|
530
|
-
script =
|
|
534
|
+
script = """\
|
|
531
535
|
# /// script
|
|
532
536
|
# dependencies = ["httpx", "pydantic>=2.0"]
|
|
533
537
|
# requires-python = ">=3.11"
|
|
@@ -543,7 +547,7 @@ resp = httpx.get("https://httpbin.org/ip")
|
|
|
543
547
|
info = IPInfo.model_validate(resp.json())
|
|
544
548
|
print(f"status: {resp.status_code}")
|
|
545
549
|
print(f"ip: {info.origin}")
|
|
546
|
-
|
|
550
|
+
"""
|
|
547
551
|
with tempfile.TemporaryDirectory(prefix="mcp-e2e-") as tmpdir:
|
|
548
552
|
path = Path(tmpdir) / "script.py"
|
|
549
553
|
path.write_text(script, encoding="utf-8")
|
{mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/e2e_tests/test_docker_sandbox.py
RENAMED
|
@@ -17,7 +17,9 @@ def _docker_available() -> bool:
|
|
|
17
17
|
return False
|
|
18
18
|
try:
|
|
19
19
|
result = subprocess.run(
|
|
20
|
-
[docker, "info"],
|
|
20
|
+
[docker, "info"],
|
|
21
|
+
capture_output=True,
|
|
22
|
+
timeout=5,
|
|
21
23
|
)
|
|
22
24
|
return result.returncode == 0
|
|
23
25
|
except (subprocess.TimeoutExpired, OSError):
|
|
@@ -31,7 +33,8 @@ def _docker_image_exists(name: str = "mcp-python-exec-sandbox") -> bool:
|
|
|
31
33
|
try:
|
|
32
34
|
result = subprocess.run(
|
|
33
35
|
[docker, "image", "inspect", name],
|
|
34
|
-
capture_output=True,
|
|
36
|
+
capture_output=True,
|
|
37
|
+
timeout=10,
|
|
35
38
|
)
|
|
36
39
|
return result.returncode == 0
|
|
37
40
|
except (subprocess.TimeoutExpired, OSError):
|
|
@@ -49,7 +52,8 @@ pytestmark = [
|
|
|
49
52
|
),
|
|
50
53
|
pytest.mark.skipif(
|
|
51
54
|
not _docker_image_exists(),
|
|
52
|
-
reason="mcp-python-exec-sandbox image not built
|
|
55
|
+
reason="mcp-python-exec-sandbox image not built "
|
|
56
|
+
"(run: docker build -t mcp-python-exec-sandbox profiles/)",
|
|
53
57
|
),
|
|
54
58
|
]
|
|
55
59
|
|
|
@@ -186,6 +190,7 @@ print(f"ETC_HOSTNAME={etc_hostname}")
|
|
|
186
190
|
|
|
187
191
|
# The container gets its own hostname (Docker assigns a short hex id)
|
|
188
192
|
import socket
|
|
193
|
+
|
|
189
194
|
host_hostname = socket.gethostname()
|
|
190
195
|
# Extract container hostname from output
|
|
191
196
|
for line in result.stdout.splitlines():
|
{mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/e2e_tests/test_mcp_protocol.py
RENAMED
|
@@ -13,8 +13,11 @@ pytestmark = pytest.mark.skipif(
|
|
|
13
13
|
)
|
|
14
14
|
|
|
15
15
|
_SERVER_CMD = [
|
|
16
|
-
sys.executable,
|
|
17
|
-
"
|
|
16
|
+
sys.executable,
|
|
17
|
+
"-m",
|
|
18
|
+
"mcp_python_exec_sandbox",
|
|
19
|
+
"--sandbox-backend",
|
|
20
|
+
"none",
|
|
18
21
|
"--no-warm-cache",
|
|
19
22
|
]
|
|
20
23
|
|
|
@@ -54,11 +57,14 @@ class MCPClient:
|
|
|
54
57
|
return json.loads(line)
|
|
55
58
|
|
|
56
59
|
def initialize(self):
|
|
57
|
-
result = self.send(
|
|
58
|
-
"
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
60
|
+
result = self.send(
|
|
61
|
+
"initialize",
|
|
62
|
+
{
|
|
63
|
+
"protocolVersion": "2024-11-05",
|
|
64
|
+
"capabilities": {},
|
|
65
|
+
"clientInfo": {"name": "test", "version": "1.0"},
|
|
66
|
+
},
|
|
67
|
+
)
|
|
62
68
|
self.send("notifications/initialized", {}, notify=True)
|
|
63
69
|
return result
|
|
64
70
|
|
|
@@ -125,35 +131,44 @@ class TestMCPProtocol:
|
|
|
125
131
|
|
|
126
132
|
def test_execute_simple_script(self, mcp):
|
|
127
133
|
"""Test executing a simple print script."""
|
|
128
|
-
result = mcp.call_tool(
|
|
129
|
-
"
|
|
130
|
-
|
|
134
|
+
result = mcp.call_tool(
|
|
135
|
+
"execute_python",
|
|
136
|
+
{
|
|
137
|
+
"script": "print('hello from mcp')",
|
|
138
|
+
},
|
|
139
|
+
)
|
|
131
140
|
text = result["result"]["content"][0]["text"]
|
|
132
141
|
assert "hello from mcp" in text
|
|
133
142
|
assert "exit_code: 0" in text
|
|
134
143
|
|
|
135
144
|
def test_execute_with_deps(self, mcp):
|
|
136
145
|
"""Test executing a script with dependencies."""
|
|
137
|
-
result = mcp.call_tool(
|
|
138
|
-
"
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
146
|
+
result = mcp.call_tool(
|
|
147
|
+
"execute_python",
|
|
148
|
+
{
|
|
149
|
+
"script": "import pydantic; print(f'v{pydantic.__version__}')",
|
|
150
|
+
"dependencies": ["pydantic>=2.0"],
|
|
151
|
+
"timeout_seconds": 120,
|
|
152
|
+
},
|
|
153
|
+
)
|
|
142
154
|
text = result["result"]["content"][0]["text"]
|
|
143
155
|
assert "exit_code: 0" in text
|
|
144
156
|
assert "v2." in text
|
|
145
157
|
|
|
146
158
|
def test_execute_pandas(self, mcp):
|
|
147
159
|
"""Test executing a pandas script via MCP."""
|
|
148
|
-
result = mcp.call_tool(
|
|
149
|
-
"
|
|
150
|
-
|
|
151
|
-
"
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
160
|
+
result = mcp.call_tool(
|
|
161
|
+
"execute_python",
|
|
162
|
+
{
|
|
163
|
+
"script": (
|
|
164
|
+
"import pandas as pd; "
|
|
165
|
+
"df = pd.DataFrame({'a': [1,2,3], 'b': [4,5,6]}); "
|
|
166
|
+
"print(df.sum().to_dict())"
|
|
167
|
+
),
|
|
168
|
+
"dependencies": ["pandas"],
|
|
169
|
+
"timeout_seconds": 120,
|
|
170
|
+
},
|
|
171
|
+
)
|
|
157
172
|
text = result["result"]["content"][0]["text"]
|
|
158
173
|
assert "exit_code: 0" in text
|
|
159
174
|
assert "'a': 6" in text
|
|
@@ -161,16 +176,19 @@ class TestMCPProtocol:
|
|
|
161
176
|
|
|
162
177
|
def test_execute_numpy_scipy(self, mcp):
|
|
163
178
|
"""Test numpy + scipy through MCP."""
|
|
164
|
-
result = mcp.call_tool(
|
|
165
|
-
"
|
|
166
|
-
|
|
167
|
-
"
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
179
|
+
result = mcp.call_tool(
|
|
180
|
+
"execute_python",
|
|
181
|
+
{
|
|
182
|
+
"script": (
|
|
183
|
+
"import numpy as np; from scipy import stats; "
|
|
184
|
+
"np.random.seed(42); "
|
|
185
|
+
"r, p = stats.pearsonr(np.random.randn(100), np.random.randn(100)); "
|
|
186
|
+
"print(f'r={r:.4f}')"
|
|
187
|
+
),
|
|
188
|
+
"dependencies": ["numpy", "scipy"],
|
|
189
|
+
"timeout_seconds": 120,
|
|
190
|
+
},
|
|
191
|
+
)
|
|
174
192
|
text = result["result"]["content"][0]["text"]
|
|
175
193
|
assert "exit_code: 0" in text
|
|
176
194
|
assert "r=" in text
|
|
@@ -178,11 +196,11 @@ class TestMCPProtocol:
|
|
|
178
196
|
def test_execute_with_pep723_inline(self, mcp):
|
|
179
197
|
"""Test a script with inline PEP 723 metadata block."""
|
|
180
198
|
script = (
|
|
181
|
-
|
|
199
|
+
"# /// script\n"
|
|
182
200
|
'# dependencies = ["rich"]\n'
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
201
|
+
"# ///\n"
|
|
202
|
+
"\n"
|
|
203
|
+
"from rich.text import Text\n"
|
|
186
204
|
'print(Text("hello").plain)\n'
|
|
187
205
|
)
|
|
188
206
|
result = mcp.call_tool("execute_python", {"script": script, "timeout_seconds": 120})
|
|
@@ -192,28 +210,37 @@ class TestMCPProtocol:
|
|
|
192
210
|
|
|
193
211
|
def test_execute_timeout(self, mcp):
|
|
194
212
|
"""Test that timeout is enforced."""
|
|
195
|
-
result = mcp.call_tool(
|
|
196
|
-
"
|
|
197
|
-
|
|
198
|
-
|
|
213
|
+
result = mcp.call_tool(
|
|
214
|
+
"execute_python",
|
|
215
|
+
{
|
|
216
|
+
"script": "import time; time.sleep(60)",
|
|
217
|
+
"timeout_seconds": 2,
|
|
218
|
+
},
|
|
219
|
+
)
|
|
199
220
|
text = result["result"]["content"][0]["text"]
|
|
200
221
|
assert "timed_out: true" in text
|
|
201
222
|
|
|
202
223
|
def test_execute_nonzero_exit(self, mcp):
|
|
203
224
|
"""Test script that exits with non-zero code."""
|
|
204
|
-
result = mcp.call_tool(
|
|
205
|
-
"
|
|
206
|
-
|
|
225
|
+
result = mcp.call_tool(
|
|
226
|
+
"execute_python",
|
|
227
|
+
{
|
|
228
|
+
"script": "import sys; print('bye'); sys.exit(1)",
|
|
229
|
+
},
|
|
230
|
+
)
|
|
207
231
|
text = result["result"]["content"][0]["text"]
|
|
208
232
|
assert "exit_code: 1" in text
|
|
209
233
|
assert "bye" in text
|
|
210
234
|
|
|
211
235
|
def test_validate_script_valid(self, mcp):
|
|
212
236
|
"""Test validate_script with valid deps."""
|
|
213
|
-
result = mcp.call_tool(
|
|
214
|
-
"
|
|
215
|
-
|
|
216
|
-
|
|
237
|
+
result = mcp.call_tool(
|
|
238
|
+
"validate_script",
|
|
239
|
+
{
|
|
240
|
+
"script": "import pandas",
|
|
241
|
+
"dependencies": ["pandas>=2.0", "numpy"],
|
|
242
|
+
},
|
|
243
|
+
)
|
|
217
244
|
text = result["result"]["content"][0]["text"]
|
|
218
245
|
assert "VALID" in text
|
|
219
246
|
assert "pandas>=2.0" in text
|
|
@@ -221,9 +248,12 @@ class TestMCPProtocol:
|
|
|
221
248
|
|
|
222
249
|
def test_validate_script_no_deps(self, mcp):
|
|
223
250
|
"""Test validate_script with a bare script."""
|
|
224
|
-
result = mcp.call_tool(
|
|
225
|
-
"
|
|
226
|
-
|
|
251
|
+
result = mcp.call_tool(
|
|
252
|
+
"validate_script",
|
|
253
|
+
{
|
|
254
|
+
"script": "print('hello')",
|
|
255
|
+
},
|
|
256
|
+
)
|
|
227
257
|
text = result["result"]["content"][0]["text"]
|
|
228
258
|
assert "VALID" in text
|
|
229
259
|
assert "dependencies: none" in text
|
|
@@ -231,12 +261,12 @@ class TestMCPProtocol:
|
|
|
231
261
|
def test_validate_script_inline_metadata(self, mcp):
|
|
232
262
|
"""Test validate_script with inline PEP 723 metadata."""
|
|
233
263
|
script = (
|
|
234
|
-
|
|
264
|
+
"# /// script\n"
|
|
235
265
|
'# dependencies = ["requests"]\n'
|
|
236
266
|
'# requires-python = ">=3.11"\n'
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
267
|
+
"# ///\n"
|
|
268
|
+
"\n"
|
|
269
|
+
"import requests\n"
|
|
240
270
|
)
|
|
241
271
|
result = mcp.call_tool("validate_script", {"script": script})
|
|
242
272
|
text = result["result"]["content"][0]["text"]
|
{mcp_python_exec_sandbox-0.1.0 → mcp_python_exec_sandbox-0.1.2}/e2e_tests/test_package_install.py
RENAMED
|
@@ -47,7 +47,7 @@ print(resp.status_code)
|
|
|
47
47
|
@pytest.mark.asyncio
|
|
48
48
|
async def test_inline_metadata_in_script():
|
|
49
49
|
"""Test that scripts with inline PEP 723 metadata work."""
|
|
50
|
-
script =
|
|
50
|
+
script = """\
|
|
51
51
|
# /// script
|
|
52
52
|
# dependencies = ["rich"]
|
|
53
53
|
# requires-python = ">=3.11"
|
|
@@ -56,7 +56,7 @@ async def test_inline_metadata_in_script():
|
|
|
56
56
|
from rich.text import Text
|
|
57
57
|
t = Text("hello")
|
|
58
58
|
print(t.plain)
|
|
59
|
-
|
|
59
|
+
"""
|
|
60
60
|
|
|
61
61
|
with tempfile.TemporaryDirectory(prefix="mcp-e2e-") as tmpdir:
|
|
62
62
|
script_path = Path(tmpdir) / "test.py"
|