safe-colab-cli 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- safe_colab_cli-0.1.0/PKG-INFO +20 -0
- safe_colab_cli-0.1.0/pyproject.toml +41 -0
- safe_colab_cli-0.1.0/safe_colab_cli/__init__.py +3 -0
- safe_colab_cli-0.1.0/safe_colab_cli/__main__.py +4 -0
- safe_colab_cli-0.1.0/safe_colab_cli/artifacts.py +181 -0
- safe_colab_cli-0.1.0/safe_colab_cli/cli.py +173 -0
- safe_colab_cli-0.1.0/safe_colab_cli/kernel.py +93 -0
- safe_colab_cli-0.1.0/safe_colab_cli/sandbox.py +73 -0
- safe_colab_cli-0.1.0/safe_colab_cli/service.py +302 -0
- safe_colab_cli-0.1.0/safe_colab_cli.egg-info/PKG-INFO +20 -0
- safe_colab_cli-0.1.0/safe_colab_cli.egg-info/SOURCES.txt +16 -0
- safe_colab_cli-0.1.0/safe_colab_cli.egg-info/dependency_links.txt +1 -0
- safe_colab_cli-0.1.0/safe_colab_cli.egg-info/entry_points.txt +2 -0
- safe_colab_cli-0.1.0/safe_colab_cli.egg-info/requires.txt +13 -0
- safe_colab_cli-0.1.0/safe_colab_cli.egg-info/top_level.txt +1 -0
- safe_colab_cli-0.1.0/setup.cfg +4 -0
- safe_colab_cli-0.1.0/tests/test_e2e.py +143 -0
- safe_colab_cli-0.1.0/tests/test_kernel.py +78 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: safe-colab-cli
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CLI tool for safe collaboration - sandboxed Jupyter kernel with remote Hypha service access
|
|
5
|
+
Author: Amun AI AB
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: safe-colab,sandbox,jupyter,hypha,remote-execution
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: hypha-rpc>=0.20.0
|
|
11
|
+
Requires-Dist: jupyter_client>=8.0
|
|
12
|
+
Requires-Dist: ipykernel>=6.0
|
|
13
|
+
Requires-Dist: click>=8.0
|
|
14
|
+
Requires-Dist: python-dotenv>=1.0
|
|
15
|
+
Requires-Dist: httpx>=0.24.0
|
|
16
|
+
Provides-Extra: sandbox
|
|
17
|
+
Requires-Dist: nono-py>=0.1.0; extra == "sandbox"
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
20
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "safe-colab-cli"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "CLI tool for safe collaboration - sandboxed Jupyter kernel with remote Hypha service access"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [{name = "Amun AI AB"}]
|
|
13
|
+
keywords = ["safe-colab", "sandbox", "jupyter", "hypha", "remote-execution"]
|
|
14
|
+
dependencies = [
|
|
15
|
+
"hypha-rpc>=0.20.0",
|
|
16
|
+
"jupyter_client>=8.0",
|
|
17
|
+
"ipykernel>=6.0",
|
|
18
|
+
"click>=8.0",
|
|
19
|
+
"python-dotenv>=1.0",
|
|
20
|
+
"httpx>=0.24.0",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.optional-dependencies]
|
|
24
|
+
sandbox = [
|
|
25
|
+
"nono-py>=0.1.0",
|
|
26
|
+
]
|
|
27
|
+
dev = [
|
|
28
|
+
"pytest>=7.0",
|
|
29
|
+
"pytest-asyncio>=0.21",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.scripts]
|
|
33
|
+
safe-colab = "safe_colab_cli.cli:main"
|
|
34
|
+
|
|
35
|
+
[tool.setuptools.packages.find]
|
|
36
|
+
where = ["."]
|
|
37
|
+
include = ["safe_colab_cli*"]
|
|
38
|
+
|
|
39
|
+
[tool.pytest.ini_options]
|
|
40
|
+
asyncio_mode = "auto"
|
|
41
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""Artifact manager integration for file sharing, logging, and auditing.
|
|
2
|
+
|
|
3
|
+
Creates a collection per user and a child artifact per session.
|
|
4
|
+
Files (plots, results, large datasets) are uploaded to the artifact manager
|
|
5
|
+
and shared via presigned URLs - no data copying needed.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import time
|
|
10
|
+
import json
|
|
11
|
+
import secrets
|
|
12
|
+
import logging
|
|
13
|
+
from datetime import datetime, timezone
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
import httpx
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger("safe_colab_cli.artifacts")
|
|
19
|
+
|
|
20
|
+
COLLECTION_ALIAS = "safe-colab-sessions"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SessionArtifactManager:
|
|
24
|
+
"""Manages artifacts for a single safe-colab session.
|
|
25
|
+
|
|
26
|
+
Creates a collection (if needed) and a session artifact within it.
|
|
27
|
+
Provides upload/download via presigned URLs and audit logging.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, artifact_manager, session_id: str, workspace: str):
|
|
31
|
+
self._am = artifact_manager
|
|
32
|
+
self.session_id = session_id
|
|
33
|
+
self.workspace = workspace
|
|
34
|
+
self.collection_id = None
|
|
35
|
+
self.artifact_id = None
|
|
36
|
+
self._log_buffer = []
|
|
37
|
+
|
|
38
|
+
async def initialize(self):
|
|
39
|
+
"""Create the collection (idempotent) and session artifact."""
|
|
40
|
+
# Ensure collection exists
|
|
41
|
+
try:
|
|
42
|
+
collection = await self._am.read(artifact_id=COLLECTION_ALIAS)
|
|
43
|
+
self.collection_id = collection["id"]
|
|
44
|
+
except Exception:
|
|
45
|
+
collection = await self._am.create(
|
|
46
|
+
type="collection",
|
|
47
|
+
alias=COLLECTION_ALIAS,
|
|
48
|
+
manifest={
|
|
49
|
+
"name": "Safe Colab Sessions",
|
|
50
|
+
"description": "Collection of safe-colab session artifacts for auditing and file sharing",
|
|
51
|
+
},
|
|
52
|
+
config={
|
|
53
|
+
"permissions": {"@": "rw+"},
|
|
54
|
+
},
|
|
55
|
+
stage=True,
|
|
56
|
+
)
|
|
57
|
+
self.collection_id = collection["id"]
|
|
58
|
+
try:
|
|
59
|
+
await self._am.commit(self.collection_id)
|
|
60
|
+
except Exception:
|
|
61
|
+
pass # Already committed or no staging needed
|
|
62
|
+
|
|
63
|
+
# Create session artifact
|
|
64
|
+
session_alias = f"session-{self.session_id[:12]}"
|
|
65
|
+
artifact = await self._am.create(
|
|
66
|
+
type="dataset",
|
|
67
|
+
alias=session_alias,
|
|
68
|
+
parent_id=self.collection_id,
|
|
69
|
+
manifest={
|
|
70
|
+
"name": f"Session {self.session_id[:8]}",
|
|
71
|
+
"description": f"Safe Colab session started at {datetime.now(timezone.utc).isoformat()}",
|
|
72
|
+
"session_id": self.session_id,
|
|
73
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
74
|
+
},
|
|
75
|
+
stage=True,
|
|
76
|
+
)
|
|
77
|
+
self.artifact_id = artifact["id"]
|
|
78
|
+
logger.info(f"Session artifact created: {self.artifact_id}")
|
|
79
|
+
|
|
80
|
+
# Write initial audit log entry
|
|
81
|
+
await self.log_event("session_start", {"session_id": self.session_id})
|
|
82
|
+
|
|
83
|
+
async def upload_file(self, local_path: str, remote_path: Optional[str] = None) -> str:
|
|
84
|
+
"""Upload a file and return a presigned download URL.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
local_path: Local file path to upload.
|
|
88
|
+
remote_path: Path within the artifact (defaults to filename).
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Presigned download URL for the uploaded file.
|
|
92
|
+
"""
|
|
93
|
+
if remote_path is None:
|
|
94
|
+
remote_path = os.path.basename(local_path)
|
|
95
|
+
|
|
96
|
+
put_url = await self._am.put_file(self.artifact_id, file_path=remote_path)
|
|
97
|
+
|
|
98
|
+
async with httpx.AsyncClient() as client:
|
|
99
|
+
with open(local_path, "rb") as f:
|
|
100
|
+
resp = await client.put(put_url, content=f.read())
|
|
101
|
+
resp.raise_for_status()
|
|
102
|
+
|
|
103
|
+
# Get download URL
|
|
104
|
+
get_url = await self._am.get_file(self.artifact_id, file_path=remote_path)
|
|
105
|
+
|
|
106
|
+
await self.log_event("file_upload", {
|
|
107
|
+
"local_path": local_path,
|
|
108
|
+
"remote_path": remote_path,
|
|
109
|
+
"size_bytes": os.path.getsize(local_path),
|
|
110
|
+
})
|
|
111
|
+
|
|
112
|
+
return get_url
|
|
113
|
+
|
|
114
|
+
async def upload_bytes(self, data: bytes, remote_path: str, content_type: str = "application/octet-stream") -> str:
|
|
115
|
+
"""Upload raw bytes and return a presigned download URL."""
|
|
116
|
+
put_url = await self._am.put_file(self.artifact_id, file_path=remote_path)
|
|
117
|
+
|
|
118
|
+
async with httpx.AsyncClient() as client:
|
|
119
|
+
resp = await client.put(put_url, content=data, headers={"Content-Type": content_type})
|
|
120
|
+
resp.raise_for_status()
|
|
121
|
+
|
|
122
|
+
get_url = await self._am.get_file(self.artifact_id, file_path=remote_path)
|
|
123
|
+
|
|
124
|
+
await self.log_event("bytes_upload", {
|
|
125
|
+
"remote_path": remote_path,
|
|
126
|
+
"size_bytes": len(data),
|
|
127
|
+
"content_type": content_type,
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
return get_url
|
|
131
|
+
|
|
132
|
+
async def get_download_url(self, remote_path: str) -> str:
|
|
133
|
+
"""Get a presigned download URL for an existing file."""
|
|
134
|
+
return await self._am.get_file(self.artifact_id, file_path=remote_path)
|
|
135
|
+
|
|
136
|
+
async def list_files(self) -> list:
|
|
137
|
+
"""List all files in the session artifact."""
|
|
138
|
+
return await self._am.list_files(self.artifact_id)
|
|
139
|
+
|
|
140
|
+
async def log_event(self, event_type: str, details: dict):
|
|
141
|
+
"""Append an audit log entry to the session artifact."""
|
|
142
|
+
entry = {
|
|
143
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
144
|
+
"event": event_type,
|
|
145
|
+
**details,
|
|
146
|
+
}
|
|
147
|
+
self._log_buffer.append(entry)
|
|
148
|
+
|
|
149
|
+
# Write log file periodically (every event for now)
|
|
150
|
+
log_content = "\n".join(json.dumps(e) for e in self._log_buffer) + "\n"
|
|
151
|
+
try:
|
|
152
|
+
put_url = await self._am.put_file(self.artifact_id, file_path="audit_log.jsonl")
|
|
153
|
+
async with httpx.AsyncClient() as client:
|
|
154
|
+
await client.put(put_url, content=log_content.encode())
|
|
155
|
+
except Exception as e:
|
|
156
|
+
logger.warning(f"Failed to write audit log: {e}")
|
|
157
|
+
|
|
158
|
+
async def log_code_execution(self, code: str, result: dict):
|
|
159
|
+
"""Log a code execution event for auditing."""
|
|
160
|
+
await self.log_event("code_execution", {
|
|
161
|
+
"code_length": len(code),
|
|
162
|
+
"code_preview": code[:200] + ("..." if len(code) > 200 else ""),
|
|
163
|
+
"has_error": result.get("error") is not None,
|
|
164
|
+
"stdout_length": len(result.get("stdout", "")),
|
|
165
|
+
})
|
|
166
|
+
|
|
167
|
+
async def log_command_execution(self, command: str, result: dict):
|
|
168
|
+
"""Log a shell command execution event."""
|
|
169
|
+
await self.log_event("command_execution", {
|
|
170
|
+
"command": command[:200],
|
|
171
|
+
"returncode": result.get("returncode"),
|
|
172
|
+
})
|
|
173
|
+
|
|
174
|
+
async def commit(self, version: Optional[str] = None):
|
|
175
|
+
"""Commit the session artifact (makes files permanent)."""
|
|
176
|
+
await self._am.commit(
|
|
177
|
+
self.artifact_id,
|
|
178
|
+
version=version,
|
|
179
|
+
comment=f"Session {self.session_id[:8]} committed",
|
|
180
|
+
)
|
|
181
|
+
logger.info(f"Session artifact committed: {self.artifact_id}")
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""CLI entry point for safe-colab."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import os
|
|
5
|
+
import signal
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
from dotenv import load_dotenv
|
|
10
|
+
|
|
11
|
+
# Load .env before click parses envvar options
|
|
12
|
+
# override=True so .env takes priority over inherited env vars (e.g. from parent Svamp session)
|
|
13
|
+
load_dotenv(override=True)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@click.group()
|
|
17
|
+
def main():
|
|
18
|
+
"""Safe Colab CLI - Sandboxed Python environment for safe AI collaboration."""
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@main.command()
|
|
23
|
+
@click.option("--data-dir", "-d", type=click.Path(exists=True), default=None,
|
|
24
|
+
help="Read-only data directory to mount (accessible as /data in sandbox)")
|
|
25
|
+
@click.option("--work-dir", "-w", type=click.Path(), default="./workspace",
|
|
26
|
+
help="Read-write working directory (accessible as /workspace in sandbox)")
|
|
27
|
+
@click.option("--server-url", envvar="HYPHA_SERVER_URL", default="https://hypha.aicell.io",
|
|
28
|
+
help="Hypha server URL")
|
|
29
|
+
@click.option("--workspace", envvar="HYPHA_WORKSPACE", default="safe-colab",
|
|
30
|
+
help="Hypha workspace name")
|
|
31
|
+
@click.option("--token", envvar="HYPHA_TOKEN", default=None,
|
|
32
|
+
help="Hypha authentication token")
|
|
33
|
+
@click.option("--no-sandbox", is_flag=True, default=False,
|
|
34
|
+
help="Disable nono sandbox (for development/testing)")
|
|
35
|
+
@click.option("--timeout", type=float, default=120,
|
|
36
|
+
help="Default code execution timeout in seconds")
|
|
37
|
+
def start(data_dir, work_dir, server_url, workspace, token, no_sandbox, timeout):
|
|
38
|
+
"""Start a sandboxed Safe Colab session.
|
|
39
|
+
|
|
40
|
+
Launches a Jupyter kernel in a sandboxed environment and registers
|
|
41
|
+
it as a Hypha service for remote code execution.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
if not token:
|
|
45
|
+
click.echo("Error: No authentication token provided. Set HYPHA_TOKEN or use --token", err=True)
|
|
46
|
+
sys.exit(1)
|
|
47
|
+
|
|
48
|
+
# Ensure work directory exists
|
|
49
|
+
os.makedirs(work_dir, exist_ok=True)
|
|
50
|
+
abs_work_dir = os.path.abspath(work_dir)
|
|
51
|
+
abs_data_dir = os.path.abspath(data_dir) if data_dir else None
|
|
52
|
+
|
|
53
|
+
click.echo("=" * 60)
|
|
54
|
+
click.echo(" Safe Colab CLI - Starting Session")
|
|
55
|
+
click.echo("=" * 60)
|
|
56
|
+
click.echo(f" Server: {server_url}")
|
|
57
|
+
click.echo(f" Workspace: {workspace}")
|
|
58
|
+
if abs_data_dir:
|
|
59
|
+
click.echo(f" Data dir: {abs_data_dir} (read-only)")
|
|
60
|
+
click.echo(f" Work dir: {abs_work_dir} (read-write)")
|
|
61
|
+
click.echo(f" Sandbox: {'disabled' if no_sandbox else 'enabled'}")
|
|
62
|
+
click.echo("=" * 60)
|
|
63
|
+
click.echo()
|
|
64
|
+
|
|
65
|
+
asyncio.run(_run_session(
|
|
66
|
+
server_url=server_url,
|
|
67
|
+
workspace=workspace,
|
|
68
|
+
token=token,
|
|
69
|
+
data_dir=abs_data_dir,
|
|
70
|
+
work_dir=abs_work_dir,
|
|
71
|
+
no_sandbox=no_sandbox,
|
|
72
|
+
timeout=timeout,
|
|
73
|
+
))
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
async def _run_session(server_url, workspace, token, data_dir, work_dir, no_sandbox, timeout):
|
|
77
|
+
"""Main async session loop."""
|
|
78
|
+
from .kernel import SandboxKernel
|
|
79
|
+
from .service import register_service
|
|
80
|
+
|
|
81
|
+
kernel = SandboxKernel()
|
|
82
|
+
|
|
83
|
+
# Prepare kernel environment
|
|
84
|
+
kernel_env = {}
|
|
85
|
+
if data_dir:
|
|
86
|
+
kernel_env["SAFE_COLAB_DATA_DIR"] = data_dir
|
|
87
|
+
kernel_env["SAFE_COLAB_WORK_DIR"] = work_dir
|
|
88
|
+
|
|
89
|
+
# Start the Jupyter kernel
|
|
90
|
+
click.echo("[1/3] Starting Jupyter kernel...")
|
|
91
|
+
await kernel.start(env=kernel_env)
|
|
92
|
+
|
|
93
|
+
# Set up working directories inside the kernel
|
|
94
|
+
setup_code = f"""
|
|
95
|
+
import os, sys
|
|
96
|
+
# Set up directory aliases
|
|
97
|
+
_data_dir = {repr(data_dir) if data_dir else 'None'}
|
|
98
|
+
_work_dir = {repr(work_dir)}
|
|
99
|
+
os.chdir(_work_dir)
|
|
100
|
+
if _data_dir and not os.path.exists('/data'):
|
|
101
|
+
# Create symlink for convenience (may fail in sandbox)
|
|
102
|
+
try:
|
|
103
|
+
os.symlink(_data_dir, '/data')
|
|
104
|
+
except (OSError, PermissionError):
|
|
105
|
+
pass
|
|
106
|
+
print(f"Working directory: {{os.getcwd()}}")
|
|
107
|
+
if _data_dir:
|
|
108
|
+
print(f"Data directory: {{_data_dir}}")
|
|
109
|
+
if os.path.exists(_data_dir):
|
|
110
|
+
print(f"Data files: {{os.listdir(_data_dir)[:20]}}")
|
|
111
|
+
"""
|
|
112
|
+
result = await kernel.execute(setup_code)
|
|
113
|
+
if result["stdout"]:
|
|
114
|
+
click.echo(f" {result['stdout'].strip()}")
|
|
115
|
+
if result["error"]:
|
|
116
|
+
click.echo(f" Warning: {result['error']['evalue']}")
|
|
117
|
+
|
|
118
|
+
# Apply sandbox (if enabled)
|
|
119
|
+
if not no_sandbox:
|
|
120
|
+
click.echo("[2/3] Setting up sandbox...")
|
|
121
|
+
from .sandbox import setup_sandbox
|
|
122
|
+
sandboxed = setup_sandbox(data_dir, work_dir)
|
|
123
|
+
if not sandboxed:
|
|
124
|
+
click.echo(" Continuing without sandbox")
|
|
125
|
+
else:
|
|
126
|
+
click.echo("[2/3] Sandbox disabled")
|
|
127
|
+
|
|
128
|
+
# Register Hypha service
|
|
129
|
+
click.echo("[3/3] Registering Hypha service...")
|
|
130
|
+
server, svc_info, instructions, service_url = await register_service(
|
|
131
|
+
server_url=server_url,
|
|
132
|
+
workspace=workspace,
|
|
133
|
+
token=token,
|
|
134
|
+
kernel=kernel,
|
|
135
|
+
data_dir=data_dir or "(none)",
|
|
136
|
+
work_dir=work_dir,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
click.echo()
|
|
140
|
+
click.echo("=" * 60)
|
|
141
|
+
click.echo(" Session Ready!")
|
|
142
|
+
click.echo("=" * 60)
|
|
143
|
+
click.echo(f" Service URL: {service_url}")
|
|
144
|
+
click.echo()
|
|
145
|
+
click.echo("Copy the instructions below and paste them to your AI agent:")
|
|
146
|
+
click.echo()
|
|
147
|
+
click.echo("-" * 60)
|
|
148
|
+
click.echo(instructions)
|
|
149
|
+
click.echo("-" * 60)
|
|
150
|
+
click.echo()
|
|
151
|
+
click.echo("Press Ctrl+C to stop the session.")
|
|
152
|
+
click.echo()
|
|
153
|
+
|
|
154
|
+
# Keep running until interrupted
|
|
155
|
+
stop_event = asyncio.Event()
|
|
156
|
+
|
|
157
|
+
def _signal_handler():
|
|
158
|
+
click.echo("\n[session] Shutting down...")
|
|
159
|
+
stop_event.set()
|
|
160
|
+
|
|
161
|
+
loop = asyncio.get_event_loop()
|
|
162
|
+
for sig in (signal.SIGINT, signal.SIGTERM):
|
|
163
|
+
loop.add_signal_handler(sig, _signal_handler)
|
|
164
|
+
|
|
165
|
+
await stop_event.wait()
|
|
166
|
+
|
|
167
|
+
# Cleanup
|
|
168
|
+
await kernel.stop()
|
|
169
|
+
click.echo("[session] Session ended.")
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
if __name__ == "__main__":
|
|
173
|
+
main()
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Jupyter kernel manager - starts and communicates with a local IPython kernel."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import uuid
|
|
5
|
+
from jupyter_client import KernelManager
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SandboxKernel:
|
|
9
|
+
"""Manages a Jupyter IPython kernel for code execution."""
|
|
10
|
+
|
|
11
|
+
def __init__(self):
|
|
12
|
+
self._km = None
|
|
13
|
+
self._kc = None
|
|
14
|
+
|
|
15
|
+
async def start(self, env=None):
|
|
16
|
+
"""Start the Jupyter kernel."""
|
|
17
|
+
self._km = KernelManager(kernel_name="python3")
|
|
18
|
+
if env:
|
|
19
|
+
self._km.kernel_spec_manager # ensure spec is loaded
|
|
20
|
+
# Extra env vars passed to the kernel process
|
|
21
|
+
self._km.extra_env = env
|
|
22
|
+
self._km.start_kernel()
|
|
23
|
+
self._kc = self._km.client()
|
|
24
|
+
self._kc.start_channels()
|
|
25
|
+
# Wait for kernel to be ready
|
|
26
|
+
await asyncio.get_event_loop().run_in_executor(
|
|
27
|
+
None, self._kc.wait_for_ready, 30
|
|
28
|
+
)
|
|
29
|
+
print("[kernel] IPython kernel started")
|
|
30
|
+
|
|
31
|
+
async def execute(self, code: str, timeout: float = 120) -> dict:
|
|
32
|
+
"""Execute code and return results.
|
|
33
|
+
|
|
34
|
+
Returns dict with keys: stdout, stderr, result, error, display_data
|
|
35
|
+
"""
|
|
36
|
+
msg_id = self._kc.execute(code)
|
|
37
|
+
return await asyncio.get_event_loop().run_in_executor(
|
|
38
|
+
None, self._collect_output, msg_id, timeout
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def _collect_output(self, msg_id: str, timeout: float) -> dict:
|
|
42
|
+
"""Collect all output messages for an execution request (blocking)."""
|
|
43
|
+
stdout_parts = []
|
|
44
|
+
stderr_parts = []
|
|
45
|
+
result = None
|
|
46
|
+
error = None
|
|
47
|
+
display_data = []
|
|
48
|
+
|
|
49
|
+
while True:
|
|
50
|
+
try:
|
|
51
|
+
msg = self._kc.get_iopub_msg(timeout=timeout)
|
|
52
|
+
except Exception:
|
|
53
|
+
break
|
|
54
|
+
|
|
55
|
+
if msg["parent_header"].get("msg_id") != msg_id:
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
msg_type = msg["msg_type"]
|
|
59
|
+
content = msg["content"]
|
|
60
|
+
|
|
61
|
+
if msg_type == "stream":
|
|
62
|
+
if content["name"] == "stdout":
|
|
63
|
+
stdout_parts.append(content["text"])
|
|
64
|
+
elif content["name"] == "stderr":
|
|
65
|
+
stderr_parts.append(content["text"])
|
|
66
|
+
elif msg_type == "execute_result":
|
|
67
|
+
result = content["data"].get("text/plain", "")
|
|
68
|
+
elif msg_type == "display_data":
|
|
69
|
+
display_data.append(content["data"])
|
|
70
|
+
elif msg_type == "error":
|
|
71
|
+
error = {
|
|
72
|
+
"ename": content["ename"],
|
|
73
|
+
"evalue": content["evalue"],
|
|
74
|
+
"traceback": content["traceback"],
|
|
75
|
+
}
|
|
76
|
+
elif msg_type == "status" and content["execution_state"] == "idle":
|
|
77
|
+
break
|
|
78
|
+
|
|
79
|
+
return {
|
|
80
|
+
"stdout": "".join(stdout_parts),
|
|
81
|
+
"stderr": "".join(stderr_parts),
|
|
82
|
+
"result": result,
|
|
83
|
+
"error": error,
|
|
84
|
+
"display_data": display_data,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
async def stop(self):
|
|
88
|
+
"""Shut down the kernel."""
|
|
89
|
+
if self._kc:
|
|
90
|
+
self._kc.stop_channels()
|
|
91
|
+
if self._km:
|
|
92
|
+
self._km.shutdown_kernel(now=True)
|
|
93
|
+
print("[kernel] Kernel stopped")
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Nono sandbox integration - applies filesystem restrictions to the kernel process."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def setup_sandbox(data_dir: str, work_dir: str):
|
|
8
|
+
"""Apply nono sandbox to the current process.
|
|
9
|
+
|
|
10
|
+
This restricts filesystem access to:
|
|
11
|
+
- data_dir: read-only
|
|
12
|
+
- work_dir: read-write
|
|
13
|
+
- Python/system paths: read-only (for imports to work)
|
|
14
|
+
|
|
15
|
+
NOTE: This is irreversible. Once applied, permissions cannot be expanded.
|
|
16
|
+
"""
|
|
17
|
+
try:
|
|
18
|
+
from nono_py import CapabilitySet, AccessMode, apply, is_supported
|
|
19
|
+
except ImportError:
|
|
20
|
+
print("[sandbox] WARNING: nono_py not installed. Running WITHOUT sandbox.")
|
|
21
|
+
print("[sandbox] Install with: pip install nono-py")
|
|
22
|
+
return False
|
|
23
|
+
|
|
24
|
+
if not is_supported():
|
|
25
|
+
print("[sandbox] WARNING: Platform does not support sandboxing. Running WITHOUT sandbox.")
|
|
26
|
+
return False
|
|
27
|
+
|
|
28
|
+
caps = CapabilitySet()
|
|
29
|
+
|
|
30
|
+
# Data directory: read-only
|
|
31
|
+
if data_dir:
|
|
32
|
+
abs_data = os.path.abspath(data_dir)
|
|
33
|
+
caps.allow_path(abs_data, AccessMode.READ)
|
|
34
|
+
print(f"[sandbox] Data dir (read-only): {abs_data}")
|
|
35
|
+
|
|
36
|
+
# Working directory: read-write
|
|
37
|
+
abs_work = os.path.abspath(work_dir)
|
|
38
|
+
caps.allow_path(abs_work, AccessMode.READ_WRITE)
|
|
39
|
+
print(f"[sandbox] Work dir (read-write): {abs_work}")
|
|
40
|
+
|
|
41
|
+
# Allow Python paths (read-only) so imports work
|
|
42
|
+
for path in sys.path:
|
|
43
|
+
if path and os.path.isdir(path):
|
|
44
|
+
caps.allow_path(path, AccessMode.READ)
|
|
45
|
+
|
|
46
|
+
# Allow standard system paths needed by Python
|
|
47
|
+
system_paths = [
|
|
48
|
+
sys.prefix,
|
|
49
|
+
sys.exec_prefix,
|
|
50
|
+
"/usr/lib",
|
|
51
|
+
"/usr/local/lib",
|
|
52
|
+
"/usr/share",
|
|
53
|
+
"/etc", # needed for some configs
|
|
54
|
+
]
|
|
55
|
+
if hasattr(sys, "base_prefix"):
|
|
56
|
+
system_paths.append(sys.base_prefix)
|
|
57
|
+
if hasattr(sys, "base_exec_prefix"):
|
|
58
|
+
system_paths.append(sys.base_exec_prefix)
|
|
59
|
+
|
|
60
|
+
for p in system_paths:
|
|
61
|
+
if p and os.path.isdir(p):
|
|
62
|
+
caps.allow_path(p, AccessMode.READ)
|
|
63
|
+
|
|
64
|
+
# Allow /tmp for temp files (read-write)
|
|
65
|
+
if os.path.isdir("/tmp"):
|
|
66
|
+
caps.allow_path("/tmp", AccessMode.READ_WRITE)
|
|
67
|
+
|
|
68
|
+
# Don't block network - needed for Hypha communication
|
|
69
|
+
# (nono allows network by default)
|
|
70
|
+
|
|
71
|
+
apply(caps)
|
|
72
|
+
print("[sandbox] Sandbox applied successfully")
|
|
73
|
+
return True
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"""Hypha service registration - exposes code execution, shell commands, and file sharing."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import secrets
|
|
5
|
+
import asyncio
|
|
6
|
+
import subprocess
|
|
7
|
+
import base64
|
|
8
|
+
import json
|
|
9
|
+
from hypha_rpc import connect_to_server
|
|
10
|
+
|
|
11
|
+
from .artifacts import SessionArtifactManager
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _build_service_url(server_url: str, service_id: str) -> str:
|
|
15
|
+
"""Build the HTTP service URL, stripping the clientId prefix."""
|
|
16
|
+
base = server_url.rstrip("/")
|
|
17
|
+
parts = service_id.split("/", 1)
|
|
18
|
+
if len(parts) == 2:
|
|
19
|
+
workspace, svc_part = parts
|
|
20
|
+
if ":" in svc_part:
|
|
21
|
+
svc_name = svc_part.split(":", 1)[1]
|
|
22
|
+
else:
|
|
23
|
+
svc_name = svc_part
|
|
24
|
+
return f"{base}/{workspace}/services/{svc_name}"
|
|
25
|
+
return f"{base}/services/{service_id}"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _build_agent_instructions(service_url: str, data_dir: str, work_dir: str) -> str:
|
|
29
|
+
"""Build agent instructions with curl examples for all endpoints."""
|
|
30
|
+
return f"""\
|
|
31
|
+
## Safe Colab - Remote Code Execution Environment
|
|
32
|
+
|
|
33
|
+
You have access to a sandboxed Python environment on a remote machine.
|
|
34
|
+
The service URL below acts as a secret — no additional token is needed.
|
|
35
|
+
|
|
36
|
+
**Service URL:** `{service_url}`
|
|
37
|
+
|
|
38
|
+
### Endpoints
|
|
39
|
+
|
|
40
|
+
#### 1. `run_code(code: str) -> dict`
|
|
41
|
+
Execute Python code in a persistent Jupyter kernel. Variables and imports persist across calls.
|
|
42
|
+
```bash
|
|
43
|
+
curl -s -X POST "$SERVICE_URL/run_code" \\
|
|
44
|
+
-H "Content-Type: application/json" \\
|
|
45
|
+
-d '{{"code": "import pandas as pd; df = pd.read_csv(\\"{data_dir}/patient_health.csv\\"); print(df.shape)"}}'
|
|
46
|
+
```
|
|
47
|
+
Returns: `{{"stdout": "...", "stderr": "...", "result": "...", "error": null | {{"ename": "...", "evalue": "...", "traceback": [...]}}, "display_data": [...]}}`
|
|
48
|
+
|
|
49
|
+
#### 2. `execute_command(command: str, timeout: int = 60) -> dict`
|
|
50
|
+
Run shell commands (pip install, ls, cat, wget, etc.).
|
|
51
|
+
```bash
|
|
52
|
+
curl -s -X POST "$SERVICE_URL/execute_command" \\
|
|
53
|
+
-H "Content-Type: application/json" \\
|
|
54
|
+
-d '{{"command": "pip install scikit-learn matplotlib"}}'
|
|
55
|
+
```
|
|
56
|
+
Returns: `{{"stdout": "...", "stderr": "...", "returncode": 0}}`
|
|
57
|
+
|
|
58
|
+
#### 3. `upload_file(file_path: str, remote_name: str = None) -> dict`
|
|
59
|
+
Upload a file from the working directory to the shared artifact store. Returns a presigned download URL.
|
|
60
|
+
```bash
|
|
61
|
+
curl -s -X POST "$SERVICE_URL/upload_file" \\
|
|
62
|
+
-H "Content-Type: application/json" \\
|
|
63
|
+
-d '{{"file_path": "{work_dir}/results.csv"}}'
|
|
64
|
+
```
|
|
65
|
+
Returns: `{{"url": "https://...", "remote_path": "results.csv"}}`
|
|
66
|
+
|
|
67
|
+
#### 4. `list_shared_files() -> list`
|
|
68
|
+
List all files uploaded to the session artifact store.
|
|
69
|
+
|
|
70
|
+
#### 5. `get_docs() -> str`
|
|
71
|
+
Get detailed documentation about the environment.
|
|
72
|
+
|
|
73
|
+
### Environment Details
|
|
74
|
+
- **Data directory** (read-only): `{data_dir}` — dataset files
|
|
75
|
+
- **Working directory** (read-write): `{work_dir}` — outputs, results, logs
|
|
76
|
+
- Full Python 3 with pip. Install packages via `execute_command("pip install <pkg>")`
|
|
77
|
+
- Jupyter kernel state persists across `run_code` calls
|
|
78
|
+
- Generated files (plots, CSVs) can be shared via `upload_file` — returns a URL
|
|
79
|
+
|
|
80
|
+
### Quick Start
|
|
81
|
+
```bash
|
|
82
|
+
SERVICE_URL="{service_url}"
|
|
83
|
+
|
|
84
|
+
# 1. Check what data is available
|
|
85
|
+
curl -s -X POST "$SERVICE_URL/execute_command" -H "Content-Type: application/json" \\
|
|
86
|
+
-d '{{"command": "ls -la {data_dir}"}}'
|
|
87
|
+
|
|
88
|
+
# 2. Install needed packages
|
|
89
|
+
curl -s -X POST "$SERVICE_URL/execute_command" -H "Content-Type: application/json" \\
|
|
90
|
+
-d '{{"command": "pip install pandas matplotlib scikit-learn"}}'
|
|
91
|
+
|
|
92
|
+
# 3. Run analysis code
|
|
93
|
+
curl -s -X POST "$SERVICE_URL/run_code" -H "Content-Type: application/json" \\
|
|
94
|
+
-d '{{"code": "import pandas as pd\\ndf = pd.read_csv(\\\"{data_dir}/patient_health.csv\\\")\\nprint(df.describe())"}}'
|
|
95
|
+
|
|
96
|
+
# 4. Save and share results
|
|
97
|
+
curl -s -X POST "$SERVICE_URL/run_code" -H "Content-Type: application/json" \\
|
|
98
|
+
-d '{{"code": "df.to_csv(\\\"{work_dir}/output.csv\\\", index=False)\\nprint(\\\"saved\\\")"}}'
|
|
99
|
+
curl -s -X POST "$SERVICE_URL/upload_file" -H "Content-Type: application/json" \\
|
|
100
|
+
-d '{{"file_path": "{work_dir}/output.csv"}}'
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Rules
|
|
104
|
+
- Do NOT access files outside the data and working directories
|
|
105
|
+
- The environment is sandboxed — filesystem access is restricted
|
|
106
|
+
- Large outputs are truncated to 50KB
|
|
107
|
+
- Code execution timeout: 120 seconds per cell
|
|
108
|
+
- All code executions and commands are logged for auditing
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
async def register_service(
|
|
113
|
+
server_url: str,
|
|
114
|
+
workspace: str,
|
|
115
|
+
token: str,
|
|
116
|
+
kernel,
|
|
117
|
+
data_dir: str,
|
|
118
|
+
work_dir: str,
|
|
119
|
+
session_id: str = None,
|
|
120
|
+
):
|
|
121
|
+
"""Connect to Hypha and register the code execution service.
|
|
122
|
+
|
|
123
|
+
Registers as unlisted with a random service ID (URL-as-secret pattern).
|
|
124
|
+
Returns (server, service_info, agent_instructions, service_url).
|
|
125
|
+
"""
|
|
126
|
+
if session_id is None:
|
|
127
|
+
session_id = secrets.token_hex(16)
|
|
128
|
+
|
|
129
|
+
server = await connect_to_server({
|
|
130
|
+
"server_url": server_url,
|
|
131
|
+
"workspace": workspace,
|
|
132
|
+
"token": token,
|
|
133
|
+
})
|
|
134
|
+
actual_workspace = (
|
|
135
|
+
server.config.get("workspace", workspace)
|
|
136
|
+
if hasattr(server.config, "get")
|
|
137
|
+
else getattr(server.config, "workspace", workspace)
|
|
138
|
+
)
|
|
139
|
+
print(f"[hypha] Connected to {server_url}, workspace: {actual_workspace}")
|
|
140
|
+
|
|
141
|
+
# Initialize artifact manager for file sharing and audit logging
|
|
142
|
+
artifact_mgr = None
|
|
143
|
+
try:
|
|
144
|
+
am_service = await server.get_service("public/artifact-manager")
|
|
145
|
+
artifact_mgr = SessionArtifactManager(am_service, session_id, actual_workspace)
|
|
146
|
+
await artifact_mgr.initialize()
|
|
147
|
+
print(f"[hypha] Session artifact created for file sharing and audit logging")
|
|
148
|
+
except Exception as e:
|
|
149
|
+
print(f"[hypha] Warning: Artifact manager not available ({e}). File sharing disabled.")
|
|
150
|
+
|
|
151
|
+
# ── Endpoint: run_code ──
|
|
152
|
+
async def run_code(code: str) -> dict:
|
|
153
|
+
"""Execute Python code in the sandboxed Jupyter kernel.
|
|
154
|
+
Variables and imports persist across calls."""
|
|
155
|
+
result = await kernel.execute(code)
|
|
156
|
+
max_len = 50000
|
|
157
|
+
for key in ("stdout", "stderr"):
|
|
158
|
+
if result[key] and len(result[key]) > max_len:
|
|
159
|
+
result[key] = result[key][:max_len] + "\n... (truncated)"
|
|
160
|
+
# Audit log
|
|
161
|
+
if artifact_mgr:
|
|
162
|
+
try:
|
|
163
|
+
await artifact_mgr.log_code_execution(code, result)
|
|
164
|
+
except Exception:
|
|
165
|
+
pass
|
|
166
|
+
return result
|
|
167
|
+
|
|
168
|
+
# ── Endpoint: execute_command ──
|
|
169
|
+
# Build env with venv's bin on PATH so pip/python resolve correctly
|
|
170
|
+
import sys
|
|
171
|
+
_cmd_env = os.environ.copy()
|
|
172
|
+
_venv_bin = os.path.dirname(sys.executable)
|
|
173
|
+
_cmd_env["PATH"] = _venv_bin + os.pathsep + _cmd_env.get("PATH", "")
|
|
174
|
+
_cmd_env["VIRTUAL_ENV"] = os.path.dirname(_venv_bin)
|
|
175
|
+
|
|
176
|
+
async def execute_command(command: str, timeout: int = 60) -> dict:
|
|
177
|
+
"""Execute a shell command in the working directory.
|
|
178
|
+
Use for pip install, file listing, system tools, etc."""
|
|
179
|
+
try:
|
|
180
|
+
proc = await asyncio.get_event_loop().run_in_executor(
|
|
181
|
+
None,
|
|
182
|
+
lambda: subprocess.run(
|
|
183
|
+
command, shell=True, capture_output=True, text=True,
|
|
184
|
+
timeout=timeout, cwd=work_dir, env=_cmd_env,
|
|
185
|
+
),
|
|
186
|
+
)
|
|
187
|
+
result = {"stdout": proc.stdout, "stderr": proc.stderr, "returncode": proc.returncode}
|
|
188
|
+
max_len = 50000
|
|
189
|
+
for key in ("stdout", "stderr"):
|
|
190
|
+
if len(result[key]) > max_len:
|
|
191
|
+
result[key] = result[key][:max_len] + "\n... (truncated)"
|
|
192
|
+
except subprocess.TimeoutExpired:
|
|
193
|
+
result = {"stdout": "", "stderr": f"Command timed out after {timeout}s", "returncode": -1}
|
|
194
|
+
# Audit log
|
|
195
|
+
if artifact_mgr:
|
|
196
|
+
try:
|
|
197
|
+
await artifact_mgr.log_command_execution(command, result)
|
|
198
|
+
except Exception:
|
|
199
|
+
pass
|
|
200
|
+
return result
|
|
201
|
+
|
|
202
|
+
# ── Endpoint: upload_file ──
|
|
203
|
+
async def upload_file(file_path: str, remote_name: str = None) -> dict:
|
|
204
|
+
"""Upload a file from the working directory to the shared artifact store.
|
|
205
|
+
Returns a presigned download URL for the remote agent."""
|
|
206
|
+
if artifact_mgr is None:
|
|
207
|
+
return {"error": "Artifact manager not available. File sharing is disabled."}
|
|
208
|
+
|
|
209
|
+
# Security: only allow files within work_dir or data_dir
|
|
210
|
+
abs_path = os.path.abspath(file_path)
|
|
211
|
+
allowed = abs_path.startswith(os.path.abspath(work_dir))
|
|
212
|
+
if data_dir and data_dir != "(none)":
|
|
213
|
+
allowed = allowed or abs_path.startswith(os.path.abspath(data_dir))
|
|
214
|
+
if not allowed:
|
|
215
|
+
return {"error": f"Access denied: file must be within work_dir or data_dir"}
|
|
216
|
+
|
|
217
|
+
if not os.path.exists(abs_path):
|
|
218
|
+
return {"error": f"File not found: {file_path}"}
|
|
219
|
+
|
|
220
|
+
if remote_name is None:
|
|
221
|
+
remote_name = os.path.basename(abs_path)
|
|
222
|
+
|
|
223
|
+
try:
|
|
224
|
+
url = await artifact_mgr.upload_file(abs_path, remote_name)
|
|
225
|
+
return {"url": url, "remote_path": remote_name}
|
|
226
|
+
except Exception as e:
|
|
227
|
+
return {"error": f"Upload failed: {str(e)}"}
|
|
228
|
+
|
|
229
|
+
# ── Endpoint: list_shared_files ──
|
|
230
|
+
async def list_shared_files() -> list:
|
|
231
|
+
"""List all files uploaded to the session artifact store."""
|
|
232
|
+
if artifact_mgr is None:
|
|
233
|
+
return []
|
|
234
|
+
try:
|
|
235
|
+
return await artifact_mgr.list_files()
|
|
236
|
+
except Exception:
|
|
237
|
+
return []
|
|
238
|
+
|
|
239
|
+
# ── Endpoint: get_docs ──
|
|
240
|
+
async def get_docs() -> str:
|
|
241
|
+
"""Get documentation about the sandboxed environment."""
|
|
242
|
+
return f"""# Safe Colab Environment
|
|
243
|
+
|
|
244
|
+
## Data Directory (read-only): {data_dir}
|
|
245
|
+
Contains the dataset files provided by the data owner.
|
|
246
|
+
|
|
247
|
+
## Working Directory (read-write): {work_dir}
|
|
248
|
+
Write outputs, logs, results, and reports here.
|
|
249
|
+
|
|
250
|
+
## Available Endpoints
|
|
251
|
+
- `run_code(code)` — Execute Python (persistent kernel, like Jupyter)
|
|
252
|
+
- `execute_command(command)` — Run shell commands (pip, ls, cat, etc.)
|
|
253
|
+
- `upload_file(file_path)` — Upload file to artifact store, get shareable URL
|
|
254
|
+
- `list_shared_files()` — List uploaded files
|
|
255
|
+
- `get_docs()` — This documentation
|
|
256
|
+
|
|
257
|
+
## Capabilities
|
|
258
|
+
- Full Python 3 environment with persistent Jupyter kernel
|
|
259
|
+
- Install any pip package: `execute_command("pip install numpy pandas matplotlib")`
|
|
260
|
+
- Read data files from {data_dir}
|
|
261
|
+
- Write results to {work_dir}
|
|
262
|
+
- Share large files (plots, datasets) via `upload_file()` → returns download URL
|
|
263
|
+
|
|
264
|
+
## Tips
|
|
265
|
+
- Install packages FIRST with execute_command before using them in run_code
|
|
266
|
+
- Variables persist across run_code calls (like Jupyter cells)
|
|
267
|
+
- For large outputs, write to file then upload_file() instead of printing
|
|
268
|
+
- All operations are logged for auditing
|
|
269
|
+
"""
|
|
270
|
+
|
|
271
|
+
# ── Register service ──
|
|
272
|
+
service_id = f"safe-colab-{secrets.token_hex(16)}"
|
|
273
|
+
|
|
274
|
+
svc_info = await server.register_service({
|
|
275
|
+
"id": service_id,
|
|
276
|
+
"name": "Safe Colab Sandbox",
|
|
277
|
+
"type": "code-interpreter",
|
|
278
|
+
"description": "Sandboxed Python environment for safe remote code execution",
|
|
279
|
+
"config": {
|
|
280
|
+
"visibility": "unlisted",
|
|
281
|
+
"require_context": False,
|
|
282
|
+
"run_in_executor": True,
|
|
283
|
+
},
|
|
284
|
+
"run_code": run_code,
|
|
285
|
+
"execute_command": execute_command,
|
|
286
|
+
"upload_file": upload_file,
|
|
287
|
+
"list_shared_files": list_shared_files,
|
|
288
|
+
"get_docs": get_docs,
|
|
289
|
+
})
|
|
290
|
+
|
|
291
|
+
actual_id = svc_info.get("id", service_id) if isinstance(svc_info, dict) else service_id
|
|
292
|
+
service_url = _build_service_url(server_url, actual_id)
|
|
293
|
+
|
|
294
|
+
instructions = _build_agent_instructions(
|
|
295
|
+
service_url=service_url,
|
|
296
|
+
data_dir=data_dir,
|
|
297
|
+
work_dir=work_dir,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
print(f"[hypha] Service registered: {actual_id}")
|
|
301
|
+
print(f"[hypha] Service URL: {service_url}")
|
|
302
|
+
return server, svc_info, instructions, service_url
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: safe-colab-cli
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CLI tool for safe collaboration - sandboxed Jupyter kernel with remote Hypha service access
|
|
5
|
+
Author: Amun AI AB
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: safe-colab,sandbox,jupyter,hypha,remote-execution
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: hypha-rpc>=0.20.0
|
|
11
|
+
Requires-Dist: jupyter_client>=8.0
|
|
12
|
+
Requires-Dist: ipykernel>=6.0
|
|
13
|
+
Requires-Dist: click>=8.0
|
|
14
|
+
Requires-Dist: python-dotenv>=1.0
|
|
15
|
+
Requires-Dist: httpx>=0.24.0
|
|
16
|
+
Provides-Extra: sandbox
|
|
17
|
+
Requires-Dist: nono-py>=0.1.0; extra == "sandbox"
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
20
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
pyproject.toml
|
|
2
|
+
safe_colab_cli/__init__.py
|
|
3
|
+
safe_colab_cli/__main__.py
|
|
4
|
+
safe_colab_cli/artifacts.py
|
|
5
|
+
safe_colab_cli/cli.py
|
|
6
|
+
safe_colab_cli/kernel.py
|
|
7
|
+
safe_colab_cli/sandbox.py
|
|
8
|
+
safe_colab_cli/service.py
|
|
9
|
+
safe_colab_cli.egg-info/PKG-INFO
|
|
10
|
+
safe_colab_cli.egg-info/SOURCES.txt
|
|
11
|
+
safe_colab_cli.egg-info/dependency_links.txt
|
|
12
|
+
safe_colab_cli.egg-info/entry_points.txt
|
|
13
|
+
safe_colab_cli.egg-info/requires.txt
|
|
14
|
+
safe_colab_cli.egg-info/top_level.txt
|
|
15
|
+
tests/test_e2e.py
|
|
16
|
+
tests/test_kernel.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
safe_colab_cli
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""End-to-end test: kernel + Hypha service + artifacts + remote code execution."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import pytest
|
|
5
|
+
from dotenv import load_dotenv
|
|
6
|
+
|
|
7
|
+
from safe_colab_cli.kernel import SandboxKernel
|
|
8
|
+
from safe_colab_cli.service import register_service
|
|
9
|
+
|
|
10
|
+
# Load .env for credentials
|
|
11
|
+
load_dotenv(os.path.join(os.path.dirname(__file__), "..", ".env"), override=True)
|
|
12
|
+
|
|
13
|
+
SERVER_URL = os.environ.get("HYPHA_SERVER_URL", "https://hypha.aicell.io")
|
|
14
|
+
WORKSPACE = os.environ.get("HYPHA_WORKSPACE", "safe-colab")
|
|
15
|
+
TOKEN = os.environ.get("HYPHA_TOKEN")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@pytest.fixture
|
|
19
|
+
async def kernel():
|
|
20
|
+
k = SandboxKernel()
|
|
21
|
+
await k.start()
|
|
22
|
+
yield k
|
|
23
|
+
await k.stop()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@pytest.mark.asyncio
|
|
27
|
+
@pytest.mark.skipif(not TOKEN, reason="HYPHA_TOKEN not set")
|
|
28
|
+
async def test_e2e_full_workflow(kernel, tmp_path):
|
|
29
|
+
"""Full end-to-end: register, run code, shell commands, upload files, list files."""
|
|
30
|
+
work_dir = str(tmp_path / "workspace")
|
|
31
|
+
os.makedirs(work_dir, exist_ok=True)
|
|
32
|
+
data_dir = str(tmp_path / "data")
|
|
33
|
+
os.makedirs(data_dir, exist_ok=True)
|
|
34
|
+
|
|
35
|
+
# Create test data
|
|
36
|
+
with open(os.path.join(data_dir, "test.csv"), "w") as f:
|
|
37
|
+
f.write("name,value\nAlice,42\nBob,17\n")
|
|
38
|
+
|
|
39
|
+
# Register service
|
|
40
|
+
server, svc_info, instructions, service_url = await register_service(
|
|
41
|
+
server_url=SERVER_URL,
|
|
42
|
+
workspace=WORKSPACE,
|
|
43
|
+
token=TOKEN,
|
|
44
|
+
kernel=kernel,
|
|
45
|
+
data_dir=data_dir,
|
|
46
|
+
work_dir=work_dir,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
assert svc_info is not None
|
|
50
|
+
assert service_url.startswith("https://")
|
|
51
|
+
assert "safe-colab-" in svc_info["id"] # Random unlisted ID
|
|
52
|
+
assert "run_code" in instructions
|
|
53
|
+
assert "execute_command" in instructions
|
|
54
|
+
assert "upload_file" in instructions
|
|
55
|
+
assert "curl" in instructions
|
|
56
|
+
print(f"\nService URL: {service_url}")
|
|
57
|
+
|
|
58
|
+
# Connect as remote agent
|
|
59
|
+
from hypha_rpc import connect_to_server as connect
|
|
60
|
+
remote_server = await connect({
|
|
61
|
+
"server_url": SERVER_URL,
|
|
62
|
+
"workspace": WORKSPACE,
|
|
63
|
+
"token": TOKEN,
|
|
64
|
+
})
|
|
65
|
+
svc = await remote_server.get_service(svc_info["id"])
|
|
66
|
+
|
|
67
|
+
# Test 1: run_code - simple print
|
|
68
|
+
result = await svc.run_code("print('Hello from remote!')")
|
|
69
|
+
assert result["stdout"].strip() == "Hello from remote!"
|
|
70
|
+
assert result["error"] is None
|
|
71
|
+
print("Test 1 passed: run_code basic")
|
|
72
|
+
|
|
73
|
+
# Test 2: run_code - expression result
|
|
74
|
+
result = await svc.run_code("2 ** 10")
|
|
75
|
+
assert result["result"] == "1024"
|
|
76
|
+
print("Test 2 passed: run_code expression")
|
|
77
|
+
|
|
78
|
+
# Test 3: run_code - error handling
|
|
79
|
+
result = await svc.run_code("raise ValueError('test')")
|
|
80
|
+
assert result["error"]["ename"] == "ValueError"
|
|
81
|
+
print("Test 3 passed: run_code error")
|
|
82
|
+
|
|
83
|
+
# Test 4: run_code - state persistence
|
|
84
|
+
await svc.run_code("my_var = 'persistent_value'")
|
|
85
|
+
result = await svc.run_code("print(my_var)")
|
|
86
|
+
assert result["stdout"].strip() == "persistent_value"
|
|
87
|
+
print("Test 4 passed: state persistence")
|
|
88
|
+
|
|
89
|
+
# Test 5: execute_command - ls
|
|
90
|
+
result = await svc.execute_command(f"ls {data_dir}")
|
|
91
|
+
assert "test.csv" in result["stdout"]
|
|
92
|
+
assert result["returncode"] == 0
|
|
93
|
+
print("Test 5 passed: execute_command ls")
|
|
94
|
+
|
|
95
|
+
# Test 6: execute_command - pip
|
|
96
|
+
result = await svc.execute_command("python -c 'import json; print(json.dumps({\"ok\": True}))'")
|
|
97
|
+
assert result["returncode"] == 0
|
|
98
|
+
print("Test 6 passed: execute_command python")
|
|
99
|
+
|
|
100
|
+
# Test 7: run_code - read CSV, write output
|
|
101
|
+
code = f"""
|
|
102
|
+
import csv
|
|
103
|
+
with open("{data_dir}/test.csv") as f:
|
|
104
|
+
reader = csv.DictReader(f)
|
|
105
|
+
rows = list(reader)
|
|
106
|
+
total = sum(int(r["value"]) for r in rows)
|
|
107
|
+
print(f"Total: {{total}}")
|
|
108
|
+
|
|
109
|
+
# Write results
|
|
110
|
+
with open("{work_dir}/results.txt", "w") as f:
|
|
111
|
+
f.write(f"Total: {{total}}\\nRows: {{len(rows)}}\\n")
|
|
112
|
+
print("Results saved")
|
|
113
|
+
"""
|
|
114
|
+
result = await svc.run_code(code)
|
|
115
|
+
assert "Total: 59" in result["stdout"]
|
|
116
|
+
assert "Results saved" in result["stdout"]
|
|
117
|
+
print("Test 7 passed: CSV read + file write")
|
|
118
|
+
|
|
119
|
+
# Test 8: upload_file
|
|
120
|
+
result = await svc.upload_file(f"{work_dir}/results.txt")
|
|
121
|
+
assert "url" in result
|
|
122
|
+
assert result["url"].startswith("https://")
|
|
123
|
+
print(f"Test 8 passed: upload_file → {result['url'][:80]}...")
|
|
124
|
+
|
|
125
|
+
# Test 9: list_shared_files
|
|
126
|
+
files = await svc.list_shared_files()
|
|
127
|
+
# Should have at least the results.txt and audit_log.jsonl
|
|
128
|
+
print(f"Test 9 passed: list_shared_files → {len(files)} files")
|
|
129
|
+
|
|
130
|
+
# Test 10: get_docs
|
|
131
|
+
docs = await svc.get_docs()
|
|
132
|
+
assert "run_code" in docs
|
|
133
|
+
assert "execute_command" in docs
|
|
134
|
+
assert "upload_file" in docs
|
|
135
|
+
print(f"Test 10 passed: get_docs → {len(docs)} chars")
|
|
136
|
+
|
|
137
|
+
# Test 11: upload_file security - reject outside work_dir
|
|
138
|
+
result = await svc.upload_file("/etc/passwd")
|
|
139
|
+
assert "error" in result
|
|
140
|
+
assert "Access denied" in result["error"]
|
|
141
|
+
print("Test 11 passed: upload_file security check")
|
|
142
|
+
|
|
143
|
+
print("\nAll e2e tests passed!")
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Test the Jupyter kernel manager."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from safe_colab_cli.kernel import SandboxKernel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@pytest.fixture
|
|
8
|
+
async def kernel():
|
|
9
|
+
k = SandboxKernel()
|
|
10
|
+
await k.start()
|
|
11
|
+
yield k
|
|
12
|
+
await k.stop()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@pytest.mark.asyncio
|
|
16
|
+
async def test_kernel_start_stop():
|
|
17
|
+
"""Test kernel can start and stop."""
|
|
18
|
+
k = SandboxKernel()
|
|
19
|
+
await k.start()
|
|
20
|
+
assert k._km is not None
|
|
21
|
+
assert k._kc is not None
|
|
22
|
+
await k.stop()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@pytest.mark.asyncio
|
|
26
|
+
async def test_kernel_execute_simple(kernel):
|
|
27
|
+
"""Test simple code execution."""
|
|
28
|
+
result = await kernel.execute("print('hello world')")
|
|
29
|
+
assert result["stdout"].strip() == "hello world"
|
|
30
|
+
assert result["error"] is None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@pytest.mark.asyncio
|
|
34
|
+
async def test_kernel_execute_result(kernel):
|
|
35
|
+
"""Test expression result capture."""
|
|
36
|
+
result = await kernel.execute("2 + 3")
|
|
37
|
+
assert result["result"] == "5"
|
|
38
|
+
assert result["error"] is None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@pytest.mark.asyncio
|
|
42
|
+
async def test_kernel_execute_error(kernel):
|
|
43
|
+
"""Test error capture."""
|
|
44
|
+
result = await kernel.execute("1/0")
|
|
45
|
+
assert result["error"] is not None
|
|
46
|
+
assert result["error"]["ename"] == "ZeroDivisionError"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@pytest.mark.asyncio
|
|
50
|
+
async def test_kernel_execute_multiline(kernel):
|
|
51
|
+
"""Test multiline code execution."""
|
|
52
|
+
code = """
|
|
53
|
+
import os
|
|
54
|
+
cwd = os.getcwd()
|
|
55
|
+
print(f"cwd: {cwd}")
|
|
56
|
+
"""
|
|
57
|
+
result = await kernel.execute(code)
|
|
58
|
+
assert "cwd:" in result["stdout"]
|
|
59
|
+
assert result["error"] is None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@pytest.mark.asyncio
|
|
63
|
+
async def test_kernel_execute_stderr(kernel):
|
|
64
|
+
"""Test stderr capture."""
|
|
65
|
+
code = """
|
|
66
|
+
import sys
|
|
67
|
+
print("error msg", file=sys.stderr)
|
|
68
|
+
"""
|
|
69
|
+
result = await kernel.execute(code)
|
|
70
|
+
assert "error msg" in result["stderr"]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@pytest.mark.asyncio
|
|
74
|
+
async def test_kernel_state_persistence(kernel):
|
|
75
|
+
"""Test that variables persist across executions."""
|
|
76
|
+
await kernel.execute("x = 42")
|
|
77
|
+
result = await kernel.execute("print(x)")
|
|
78
|
+
assert result["stdout"].strip() == "42"
|