safe-colab-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.4
2
+ Name: safe-colab-cli
3
+ Version: 0.1.0
4
+ Summary: CLI tool for safe collaboration - sandboxed Jupyter kernel with remote Hypha service access
5
+ Author: Amun AI AB
6
+ License: MIT
7
+ Keywords: safe-colab,sandbox,jupyter,hypha,remote-execution
8
+ Requires-Python: >=3.9
9
+ Description-Content-Type: text/markdown
10
+ Requires-Dist: hypha-rpc>=0.20.0
11
+ Requires-Dist: jupyter_client>=8.0
12
+ Requires-Dist: ipykernel>=6.0
13
+ Requires-Dist: click>=8.0
14
+ Requires-Dist: python-dotenv>=1.0
15
+ Requires-Dist: httpx>=0.24.0
16
+ Provides-Extra: sandbox
17
+ Requires-Dist: nono-py>=0.1.0; extra == "sandbox"
18
+ Provides-Extra: dev
19
+ Requires-Dist: pytest>=7.0; extra == "dev"
20
+ Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
@@ -0,0 +1,41 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "safe-colab-cli"
7
+ version = "0.1.0"
8
+ description = "CLI tool for safe collaboration - sandboxed Jupyter kernel with remote Hypha service access"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.9"
12
+ authors = [{name = "Amun AI AB"}]
13
+ keywords = ["safe-colab", "sandbox", "jupyter", "hypha", "remote-execution"]
14
+ dependencies = [
15
+ "hypha-rpc>=0.20.0",
16
+ "jupyter_client>=8.0",
17
+ "ipykernel>=6.0",
18
+ "click>=8.0",
19
+ "python-dotenv>=1.0",
20
+ "httpx>=0.24.0",
21
+ ]
22
+
23
+ [project.optional-dependencies]
24
+ sandbox = [
25
+ "nono-py>=0.1.0",
26
+ ]
27
+ dev = [
28
+ "pytest>=7.0",
29
+ "pytest-asyncio>=0.21",
30
+ ]
31
+
32
+ [project.scripts]
33
+ safe-colab = "safe_colab_cli.cli:main"
34
+
35
+ [tool.setuptools.packages.find]
36
+ where = ["."]
37
+ include = ["safe_colab_cli*"]
38
+
39
+ [tool.pytest.ini_options]
40
+ asyncio_mode = "auto"
41
+ testpaths = ["tests"]
@@ -0,0 +1,3 @@
1
+ """Safe Colab CLI - Sandboxed Jupyter kernel with remote Hypha service access."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,4 @@
1
+ """Allow running as python -m safe_colab_cli."""
2
+ from .cli import main
3
+
4
+ main()
@@ -0,0 +1,181 @@
1
+ """Artifact manager integration for file sharing, logging, and auditing.
2
+
3
+ Creates a collection per user and a child artifact per session.
4
+ Files (plots, results, large datasets) are uploaded to the artifact manager
5
+ and shared via presigned URLs - no data copying needed.
6
+ """
7
+
8
+ import os
9
+ import time
10
+ import json
11
+ import secrets
12
+ import logging
13
+ from datetime import datetime, timezone
14
+ from typing import Optional
15
+
16
+ import httpx
17
+
18
+ logger = logging.getLogger("safe_colab_cli.artifacts")
19
+
20
+ COLLECTION_ALIAS = "safe-colab-sessions"
21
+
22
+
23
+ class SessionArtifactManager:
24
+ """Manages artifacts for a single safe-colab session.
25
+
26
+ Creates a collection (if needed) and a session artifact within it.
27
+ Provides upload/download via presigned URLs and audit logging.
28
+ """
29
+
30
+ def __init__(self, artifact_manager, session_id: str, workspace: str):
31
+ self._am = artifact_manager
32
+ self.session_id = session_id
33
+ self.workspace = workspace
34
+ self.collection_id = None
35
+ self.artifact_id = None
36
+ self._log_buffer = []
37
+
38
+ async def initialize(self):
39
+ """Create the collection (idempotent) and session artifact."""
40
+ # Ensure collection exists
41
+ try:
42
+ collection = await self._am.read(artifact_id=COLLECTION_ALIAS)
43
+ self.collection_id = collection["id"]
44
+ except Exception:
45
+ collection = await self._am.create(
46
+ type="collection",
47
+ alias=COLLECTION_ALIAS,
48
+ manifest={
49
+ "name": "Safe Colab Sessions",
50
+ "description": "Collection of safe-colab session artifacts for auditing and file sharing",
51
+ },
52
+ config={
53
+ "permissions": {"@": "rw+"},
54
+ },
55
+ stage=True,
56
+ )
57
+ self.collection_id = collection["id"]
58
+ try:
59
+ await self._am.commit(self.collection_id)
60
+ except Exception:
61
+ pass # Already committed or no staging needed
62
+
63
+ # Create session artifact
64
+ session_alias = f"session-{self.session_id[:12]}"
65
+ artifact = await self._am.create(
66
+ type="dataset",
67
+ alias=session_alias,
68
+ parent_id=self.collection_id,
69
+ manifest={
70
+ "name": f"Session {self.session_id[:8]}",
71
+ "description": f"Safe Colab session started at {datetime.now(timezone.utc).isoformat()}",
72
+ "session_id": self.session_id,
73
+ "created_at": datetime.now(timezone.utc).isoformat(),
74
+ },
75
+ stage=True,
76
+ )
77
+ self.artifact_id = artifact["id"]
78
+ logger.info(f"Session artifact created: {self.artifact_id}")
79
+
80
+ # Write initial audit log entry
81
+ await self.log_event("session_start", {"session_id": self.session_id})
82
+
83
+ async def upload_file(self, local_path: str, remote_path: Optional[str] = None) -> str:
84
+ """Upload a file and return a presigned download URL.
85
+
86
+ Args:
87
+ local_path: Local file path to upload.
88
+ remote_path: Path within the artifact (defaults to filename).
89
+
90
+ Returns:
91
+ Presigned download URL for the uploaded file.
92
+ """
93
+ if remote_path is None:
94
+ remote_path = os.path.basename(local_path)
95
+
96
+ put_url = await self._am.put_file(self.artifact_id, file_path=remote_path)
97
+
98
+ async with httpx.AsyncClient() as client:
99
+ with open(local_path, "rb") as f:
100
+ resp = await client.put(put_url, content=f.read())
101
+ resp.raise_for_status()
102
+
103
+ # Get download URL
104
+ get_url = await self._am.get_file(self.artifact_id, file_path=remote_path)
105
+
106
+ await self.log_event("file_upload", {
107
+ "local_path": local_path,
108
+ "remote_path": remote_path,
109
+ "size_bytes": os.path.getsize(local_path),
110
+ })
111
+
112
+ return get_url
113
+
114
+ async def upload_bytes(self, data: bytes, remote_path: str, content_type: str = "application/octet-stream") -> str:
115
+ """Upload raw bytes and return a presigned download URL."""
116
+ put_url = await self._am.put_file(self.artifact_id, file_path=remote_path)
117
+
118
+ async with httpx.AsyncClient() as client:
119
+ resp = await client.put(put_url, content=data, headers={"Content-Type": content_type})
120
+ resp.raise_for_status()
121
+
122
+ get_url = await self._am.get_file(self.artifact_id, file_path=remote_path)
123
+
124
+ await self.log_event("bytes_upload", {
125
+ "remote_path": remote_path,
126
+ "size_bytes": len(data),
127
+ "content_type": content_type,
128
+ })
129
+
130
+ return get_url
131
+
132
+ async def get_download_url(self, remote_path: str) -> str:
133
+ """Get a presigned download URL for an existing file."""
134
+ return await self._am.get_file(self.artifact_id, file_path=remote_path)
135
+
136
+ async def list_files(self) -> list:
137
+ """List all files in the session artifact."""
138
+ return await self._am.list_files(self.artifact_id)
139
+
140
+ async def log_event(self, event_type: str, details: dict):
141
+ """Append an audit log entry to the session artifact."""
142
+ entry = {
143
+ "timestamp": datetime.now(timezone.utc).isoformat(),
144
+ "event": event_type,
145
+ **details,
146
+ }
147
+ self._log_buffer.append(entry)
148
+
149
+ # Write log file periodically (every event for now)
150
+ log_content = "\n".join(json.dumps(e) for e in self._log_buffer) + "\n"
151
+ try:
152
+ put_url = await self._am.put_file(self.artifact_id, file_path="audit_log.jsonl")
153
+ async with httpx.AsyncClient() as client:
154
+ await client.put(put_url, content=log_content.encode())
155
+ except Exception as e:
156
+ logger.warning(f"Failed to write audit log: {e}")
157
+
158
+ async def log_code_execution(self, code: str, result: dict):
159
+ """Log a code execution event for auditing."""
160
+ await self.log_event("code_execution", {
161
+ "code_length": len(code),
162
+ "code_preview": code[:200] + ("..." if len(code) > 200 else ""),
163
+ "has_error": result.get("error") is not None,
164
+ "stdout_length": len(result.get("stdout", "")),
165
+ })
166
+
167
+ async def log_command_execution(self, command: str, result: dict):
168
+ """Log a shell command execution event."""
169
+ await self.log_event("command_execution", {
170
+ "command": command[:200],
171
+ "returncode": result.get("returncode"),
172
+ })
173
+
174
+ async def commit(self, version: Optional[str] = None):
175
+ """Commit the session artifact (makes files permanent)."""
176
+ await self._am.commit(
177
+ self.artifact_id,
178
+ version=version,
179
+ comment=f"Session {self.session_id[:8]} committed",
180
+ )
181
+ logger.info(f"Session artifact committed: {self.artifact_id}")
@@ -0,0 +1,173 @@
1
+ """CLI entry point for safe-colab."""
2
+
3
+ import asyncio
4
+ import os
5
+ import signal
6
+ import sys
7
+
8
+ import click
9
+ from dotenv import load_dotenv
10
+
11
+ # Load .env before click parses envvar options
12
+ # override=True so .env takes priority over inherited env vars (e.g. from parent Svamp session)
13
+ load_dotenv(override=True)
14
+
15
+
16
+ @click.group()
17
+ def main():
18
+ """Safe Colab CLI - Sandboxed Python environment for safe AI collaboration."""
19
+ pass
20
+
21
+
22
+ @main.command()
23
+ @click.option("--data-dir", "-d", type=click.Path(exists=True), default=None,
24
+ help="Read-only data directory to mount (accessible as /data in sandbox)")
25
+ @click.option("--work-dir", "-w", type=click.Path(), default="./workspace",
26
+ help="Read-write working directory (accessible as /workspace in sandbox)")
27
+ @click.option("--server-url", envvar="HYPHA_SERVER_URL", default="https://hypha.aicell.io",
28
+ help="Hypha server URL")
29
+ @click.option("--workspace", envvar="HYPHA_WORKSPACE", default="safe-colab",
30
+ help="Hypha workspace name")
31
+ @click.option("--token", envvar="HYPHA_TOKEN", default=None,
32
+ help="Hypha authentication token")
33
+ @click.option("--no-sandbox", is_flag=True, default=False,
34
+ help="Disable nono sandbox (for development/testing)")
35
+ @click.option("--timeout", type=float, default=120,
36
+ help="Default code execution timeout in seconds")
37
+ def start(data_dir, work_dir, server_url, workspace, token, no_sandbox, timeout):
38
+ """Start a sandboxed Safe Colab session.
39
+
40
+ Launches a Jupyter kernel in a sandboxed environment and registers
41
+ it as a Hypha service for remote code execution.
42
+ """
43
+
44
+ if not token:
45
+ click.echo("Error: No authentication token provided. Set HYPHA_TOKEN or use --token", err=True)
46
+ sys.exit(1)
47
+
48
+ # Ensure work directory exists
49
+ os.makedirs(work_dir, exist_ok=True)
50
+ abs_work_dir = os.path.abspath(work_dir)
51
+ abs_data_dir = os.path.abspath(data_dir) if data_dir else None
52
+
53
+ click.echo("=" * 60)
54
+ click.echo(" Safe Colab CLI - Starting Session")
55
+ click.echo("=" * 60)
56
+ click.echo(f" Server: {server_url}")
57
+ click.echo(f" Workspace: {workspace}")
58
+ if abs_data_dir:
59
+ click.echo(f" Data dir: {abs_data_dir} (read-only)")
60
+ click.echo(f" Work dir: {abs_work_dir} (read-write)")
61
+ click.echo(f" Sandbox: {'disabled' if no_sandbox else 'enabled'}")
62
+ click.echo("=" * 60)
63
+ click.echo()
64
+
65
+ asyncio.run(_run_session(
66
+ server_url=server_url,
67
+ workspace=workspace,
68
+ token=token,
69
+ data_dir=abs_data_dir,
70
+ work_dir=abs_work_dir,
71
+ no_sandbox=no_sandbox,
72
+ timeout=timeout,
73
+ ))
74
+
75
+
76
+ async def _run_session(server_url, workspace, token, data_dir, work_dir, no_sandbox, timeout):
77
+ """Main async session loop."""
78
+ from .kernel import SandboxKernel
79
+ from .service import register_service
80
+
81
+ kernel = SandboxKernel()
82
+
83
+ # Prepare kernel environment
84
+ kernel_env = {}
85
+ if data_dir:
86
+ kernel_env["SAFE_COLAB_DATA_DIR"] = data_dir
87
+ kernel_env["SAFE_COLAB_WORK_DIR"] = work_dir
88
+
89
+ # Start the Jupyter kernel
90
+ click.echo("[1/3] Starting Jupyter kernel...")
91
+ await kernel.start(env=kernel_env)
92
+
93
+ # Set up working directories inside the kernel
94
+ setup_code = f"""
95
+ import os, sys
96
+ # Set up directory aliases
97
+ _data_dir = {repr(data_dir) if data_dir else 'None'}
98
+ _work_dir = {repr(work_dir)}
99
+ os.chdir(_work_dir)
100
+ if _data_dir and not os.path.exists('/data'):
101
+ # Create symlink for convenience (may fail in sandbox)
102
+ try:
103
+ os.symlink(_data_dir, '/data')
104
+ except (OSError, PermissionError):
105
+ pass
106
+ print(f"Working directory: {{os.getcwd()}}")
107
+ if _data_dir:
108
+ print(f"Data directory: {{_data_dir}}")
109
+ if os.path.exists(_data_dir):
110
+ print(f"Data files: {{os.listdir(_data_dir)[:20]}}")
111
+ """
112
+ result = await kernel.execute(setup_code)
113
+ if result["stdout"]:
114
+ click.echo(f" {result['stdout'].strip()}")
115
+ if result["error"]:
116
+ click.echo(f" Warning: {result['error']['evalue']}")
117
+
118
+ # Apply sandbox (if enabled)
119
+ if not no_sandbox:
120
+ click.echo("[2/3] Setting up sandbox...")
121
+ from .sandbox import setup_sandbox
122
+ sandboxed = setup_sandbox(data_dir, work_dir)
123
+ if not sandboxed:
124
+ click.echo(" Continuing without sandbox")
125
+ else:
126
+ click.echo("[2/3] Sandbox disabled")
127
+
128
+ # Register Hypha service
129
+ click.echo("[3/3] Registering Hypha service...")
130
+ server, svc_info, instructions, service_url = await register_service(
131
+ server_url=server_url,
132
+ workspace=workspace,
133
+ token=token,
134
+ kernel=kernel,
135
+ data_dir=data_dir or "(none)",
136
+ work_dir=work_dir,
137
+ )
138
+
139
+ click.echo()
140
+ click.echo("=" * 60)
141
+ click.echo(" Session Ready!")
142
+ click.echo("=" * 60)
143
+ click.echo(f" Service URL: {service_url}")
144
+ click.echo()
145
+ click.echo("Copy the instructions below and paste them to your AI agent:")
146
+ click.echo()
147
+ click.echo("-" * 60)
148
+ click.echo(instructions)
149
+ click.echo("-" * 60)
150
+ click.echo()
151
+ click.echo("Press Ctrl+C to stop the session.")
152
+ click.echo()
153
+
154
+ # Keep running until interrupted
155
+ stop_event = asyncio.Event()
156
+
157
+ def _signal_handler():
158
+ click.echo("\n[session] Shutting down...")
159
+ stop_event.set()
160
+
161
+ loop = asyncio.get_event_loop()
162
+ for sig in (signal.SIGINT, signal.SIGTERM):
163
+ loop.add_signal_handler(sig, _signal_handler)
164
+
165
+ await stop_event.wait()
166
+
167
+ # Cleanup
168
+ await kernel.stop()
169
+ click.echo("[session] Session ended.")
170
+
171
+
172
+ if __name__ == "__main__":
173
+ main()
@@ -0,0 +1,93 @@
1
+ """Jupyter kernel manager - starts and communicates with a local IPython kernel."""
2
+
3
+ import asyncio
4
+ import uuid
5
+ from jupyter_client import KernelManager
6
+
7
+
8
+ class SandboxKernel:
9
+ """Manages a Jupyter IPython kernel for code execution."""
10
+
11
+ def __init__(self):
12
+ self._km = None
13
+ self._kc = None
14
+
15
+ async def start(self, env=None):
16
+ """Start the Jupyter kernel."""
17
+ self._km = KernelManager(kernel_name="python3")
18
+ if env:
19
+ self._km.kernel_spec_manager # ensure spec is loaded
20
+ # Extra env vars passed to the kernel process
21
+ self._km.extra_env = env
22
+ self._km.start_kernel()
23
+ self._kc = self._km.client()
24
+ self._kc.start_channels()
25
+ # Wait for kernel to be ready
26
+ await asyncio.get_event_loop().run_in_executor(
27
+ None, self._kc.wait_for_ready, 30
28
+ )
29
+ print("[kernel] IPython kernel started")
30
+
31
+ async def execute(self, code: str, timeout: float = 120) -> dict:
32
+ """Execute code and return results.
33
+
34
+ Returns dict with keys: stdout, stderr, result, error, display_data
35
+ """
36
+ msg_id = self._kc.execute(code)
37
+ return await asyncio.get_event_loop().run_in_executor(
38
+ None, self._collect_output, msg_id, timeout
39
+ )
40
+
41
+ def _collect_output(self, msg_id: str, timeout: float) -> dict:
42
+ """Collect all output messages for an execution request (blocking)."""
43
+ stdout_parts = []
44
+ stderr_parts = []
45
+ result = None
46
+ error = None
47
+ display_data = []
48
+
49
+ while True:
50
+ try:
51
+ msg = self._kc.get_iopub_msg(timeout=timeout)
52
+ except Exception:
53
+ break
54
+
55
+ if msg["parent_header"].get("msg_id") != msg_id:
56
+ continue
57
+
58
+ msg_type = msg["msg_type"]
59
+ content = msg["content"]
60
+
61
+ if msg_type == "stream":
62
+ if content["name"] == "stdout":
63
+ stdout_parts.append(content["text"])
64
+ elif content["name"] == "stderr":
65
+ stderr_parts.append(content["text"])
66
+ elif msg_type == "execute_result":
67
+ result = content["data"].get("text/plain", "")
68
+ elif msg_type == "display_data":
69
+ display_data.append(content["data"])
70
+ elif msg_type == "error":
71
+ error = {
72
+ "ename": content["ename"],
73
+ "evalue": content["evalue"],
74
+ "traceback": content["traceback"],
75
+ }
76
+ elif msg_type == "status" and content["execution_state"] == "idle":
77
+ break
78
+
79
+ return {
80
+ "stdout": "".join(stdout_parts),
81
+ "stderr": "".join(stderr_parts),
82
+ "result": result,
83
+ "error": error,
84
+ "display_data": display_data,
85
+ }
86
+
87
+ async def stop(self):
88
+ """Shut down the kernel."""
89
+ if self._kc:
90
+ self._kc.stop_channels()
91
+ if self._km:
92
+ self._km.shutdown_kernel(now=True)
93
+ print("[kernel] Kernel stopped")
@@ -0,0 +1,73 @@
1
+ """Nono sandbox integration - applies filesystem restrictions to the kernel process."""
2
+
3
+ import os
4
+ import sys
5
+
6
+
7
+ def setup_sandbox(data_dir: str, work_dir: str):
8
+ """Apply nono sandbox to the current process.
9
+
10
+ This restricts filesystem access to:
11
+ - data_dir: read-only
12
+ - work_dir: read-write
13
+ - Python/system paths: read-only (for imports to work)
14
+
15
+ NOTE: This is irreversible. Once applied, permissions cannot be expanded.
16
+ """
17
+ try:
18
+ from nono_py import CapabilitySet, AccessMode, apply, is_supported
19
+ except ImportError:
20
+ print("[sandbox] WARNING: nono_py not installed. Running WITHOUT sandbox.")
21
+ print("[sandbox] Install with: pip install nono-py")
22
+ return False
23
+
24
+ if not is_supported():
25
+ print("[sandbox] WARNING: Platform does not support sandboxing. Running WITHOUT sandbox.")
26
+ return False
27
+
28
+ caps = CapabilitySet()
29
+
30
+ # Data directory: read-only
31
+ if data_dir:
32
+ abs_data = os.path.abspath(data_dir)
33
+ caps.allow_path(abs_data, AccessMode.READ)
34
+ print(f"[sandbox] Data dir (read-only): {abs_data}")
35
+
36
+ # Working directory: read-write
37
+ abs_work = os.path.abspath(work_dir)
38
+ caps.allow_path(abs_work, AccessMode.READ_WRITE)
39
+ print(f"[sandbox] Work dir (read-write): {abs_work}")
40
+
41
+ # Allow Python paths (read-only) so imports work
42
+ for path in sys.path:
43
+ if path and os.path.isdir(path):
44
+ caps.allow_path(path, AccessMode.READ)
45
+
46
+ # Allow standard system paths needed by Python
47
+ system_paths = [
48
+ sys.prefix,
49
+ sys.exec_prefix,
50
+ "/usr/lib",
51
+ "/usr/local/lib",
52
+ "/usr/share",
53
+ "/etc", # needed for some configs
54
+ ]
55
+ if hasattr(sys, "base_prefix"):
56
+ system_paths.append(sys.base_prefix)
57
+ if hasattr(sys, "base_exec_prefix"):
58
+ system_paths.append(sys.base_exec_prefix)
59
+
60
+ for p in system_paths:
61
+ if p and os.path.isdir(p):
62
+ caps.allow_path(p, AccessMode.READ)
63
+
64
+ # Allow /tmp for temp files (read-write)
65
+ if os.path.isdir("/tmp"):
66
+ caps.allow_path("/tmp", AccessMode.READ_WRITE)
67
+
68
+ # Don't block network - needed for Hypha communication
69
+ # (nono allows network by default)
70
+
71
+ apply(caps)
72
+ print("[sandbox] Sandbox applied successfully")
73
+ return True
@@ -0,0 +1,302 @@
1
+ """Hypha service registration - exposes code execution, shell commands, and file sharing."""
2
+
3
+ import os
4
+ import secrets
5
+ import asyncio
6
+ import subprocess
7
+ import base64
8
+ import json
9
+ from hypha_rpc import connect_to_server
10
+
11
+ from .artifacts import SessionArtifactManager
12
+
13
+
14
+ def _build_service_url(server_url: str, service_id: str) -> str:
15
+ """Build the HTTP service URL, stripping the clientId prefix."""
16
+ base = server_url.rstrip("/")
17
+ parts = service_id.split("/", 1)
18
+ if len(parts) == 2:
19
+ workspace, svc_part = parts
20
+ if ":" in svc_part:
21
+ svc_name = svc_part.split(":", 1)[1]
22
+ else:
23
+ svc_name = svc_part
24
+ return f"{base}/{workspace}/services/{svc_name}"
25
+ return f"{base}/services/{service_id}"
26
+
27
+
28
+ def _build_agent_instructions(service_url: str, data_dir: str, work_dir: str) -> str:
29
+ """Build agent instructions with curl examples for all endpoints."""
30
+ return f"""\
31
+ ## Safe Colab - Remote Code Execution Environment
32
+
33
+ You have access to a sandboxed Python environment on a remote machine.
34
+ The service URL below acts as a secret — no additional token is needed.
35
+
36
+ **Service URL:** `{service_url}`
37
+
38
+ ### Endpoints
39
+
40
+ #### 1. `run_code(code: str) -> dict`
41
+ Execute Python code in a persistent Jupyter kernel. Variables and imports persist across calls.
42
+ ```bash
43
+ curl -s -X POST "$SERVICE_URL/run_code" \\
44
+ -H "Content-Type: application/json" \\
45
+ -d '{{"code": "import pandas as pd; df = pd.read_csv(\\"{data_dir}/patient_health.csv\\"); print(df.shape)"}}'
46
+ ```
47
+ Returns: `{{"stdout": "...", "stderr": "...", "result": "...", "error": null | {{"ename": "...", "evalue": "...", "traceback": [...]}}, "display_data": [...]}}`
48
+
49
+ #### 2. `execute_command(command: str, timeout: int = 60) -> dict`
50
+ Run shell commands (pip install, ls, cat, wget, etc.).
51
+ ```bash
52
+ curl -s -X POST "$SERVICE_URL/execute_command" \\
53
+ -H "Content-Type: application/json" \\
54
+ -d '{{"command": "pip install scikit-learn matplotlib"}}'
55
+ ```
56
+ Returns: `{{"stdout": "...", "stderr": "...", "returncode": 0}}`
57
+
58
+ #### 3. `upload_file(file_path: str, remote_name: str = None) -> dict`
59
+ Upload a file from the working directory to the shared artifact store. Returns a presigned download URL.
60
+ ```bash
61
+ curl -s -X POST "$SERVICE_URL/upload_file" \\
62
+ -H "Content-Type: application/json" \\
63
+ -d '{{"file_path": "{work_dir}/results.csv"}}'
64
+ ```
65
+ Returns: `{{"url": "https://...", "remote_path": "results.csv"}}`
66
+
67
+ #### 4. `list_shared_files() -> list`
68
+ List all files uploaded to the session artifact store.
69
+
70
+ #### 5. `get_docs() -> str`
71
+ Get detailed documentation about the environment.
72
+
73
+ ### Environment Details
74
+ - **Data directory** (read-only): `{data_dir}` — dataset files
75
+ - **Working directory** (read-write): `{work_dir}` — outputs, results, logs
76
+ - Full Python 3 with pip. Install packages via `execute_command("pip install <pkg>")`
77
+ - Jupyter kernel state persists across `run_code` calls
78
+ - Generated files (plots, CSVs) can be shared via `upload_file` — returns a URL
79
+
80
+ ### Quick Start
81
+ ```bash
82
+ SERVICE_URL="{service_url}"
83
+
84
+ # 1. Check what data is available
85
+ curl -s -X POST "$SERVICE_URL/execute_command" -H "Content-Type: application/json" \\
86
+ -d '{{"command": "ls -la {data_dir}"}}'
87
+
88
+ # 2. Install needed packages
89
+ curl -s -X POST "$SERVICE_URL/execute_command" -H "Content-Type: application/json" \\
90
+ -d '{{"command": "pip install pandas matplotlib scikit-learn"}}'
91
+
92
+ # 3. Run analysis code
93
+ curl -s -X POST "$SERVICE_URL/run_code" -H "Content-Type: application/json" \\
94
+ -d '{{"code": "import pandas as pd\\ndf = pd.read_csv(\\\"{data_dir}/patient_health.csv\\\")\\nprint(df.describe())"}}'
95
+
96
+ # 4. Save and share results
97
+ curl -s -X POST "$SERVICE_URL/run_code" -H "Content-Type: application/json" \\
98
+ -d '{{"code": "df.to_csv(\\\"{work_dir}/output.csv\\\", index=False)\\nprint(\\\"saved\\\")"}}'
99
+ curl -s -X POST "$SERVICE_URL/upload_file" -H "Content-Type: application/json" \\
100
+ -d '{{"file_path": "{work_dir}/output.csv"}}'
101
+ ```
102
+
103
+ ### Rules
104
+ - Do NOT access files outside the data and working directories
105
+ - The environment is sandboxed — filesystem access is restricted
106
+ - Large outputs are truncated to 50KB
107
+ - Code execution timeout: 120 seconds per cell
108
+ - All code executions and commands are logged for auditing
109
+ """
110
+
111
+
112
+ async def register_service(
113
+ server_url: str,
114
+ workspace: str,
115
+ token: str,
116
+ kernel,
117
+ data_dir: str,
118
+ work_dir: str,
119
+ session_id: str = None,
120
+ ):
121
+ """Connect to Hypha and register the code execution service.
122
+
123
+ Registers as unlisted with a random service ID (URL-as-secret pattern).
124
+ Returns (server, service_info, agent_instructions, service_url).
125
+ """
126
+ if session_id is None:
127
+ session_id = secrets.token_hex(16)
128
+
129
+ server = await connect_to_server({
130
+ "server_url": server_url,
131
+ "workspace": workspace,
132
+ "token": token,
133
+ })
134
+ actual_workspace = (
135
+ server.config.get("workspace", workspace)
136
+ if hasattr(server.config, "get")
137
+ else getattr(server.config, "workspace", workspace)
138
+ )
139
+ print(f"[hypha] Connected to {server_url}, workspace: {actual_workspace}")
140
+
141
+ # Initialize artifact manager for file sharing and audit logging
142
+ artifact_mgr = None
143
+ try:
144
+ am_service = await server.get_service("public/artifact-manager")
145
+ artifact_mgr = SessionArtifactManager(am_service, session_id, actual_workspace)
146
+ await artifact_mgr.initialize()
147
+ print(f"[hypha] Session artifact created for file sharing and audit logging")
148
+ except Exception as e:
149
+ print(f"[hypha] Warning: Artifact manager not available ({e}). File sharing disabled.")
150
+
151
+ # ── Endpoint: run_code ──
152
+ async def run_code(code: str) -> dict:
153
+ """Execute Python code in the sandboxed Jupyter kernel.
154
+ Variables and imports persist across calls."""
155
+ result = await kernel.execute(code)
156
+ max_len = 50000
157
+ for key in ("stdout", "stderr"):
158
+ if result[key] and len(result[key]) > max_len:
159
+ result[key] = result[key][:max_len] + "\n... (truncated)"
160
+ # Audit log
161
+ if artifact_mgr:
162
+ try:
163
+ await artifact_mgr.log_code_execution(code, result)
164
+ except Exception:
165
+ pass
166
+ return result
167
+
168
+ # ── Endpoint: execute_command ──
169
+ # Build env with venv's bin on PATH so pip/python resolve correctly
170
+ import sys
171
+ _cmd_env = os.environ.copy()
172
+ _venv_bin = os.path.dirname(sys.executable)
173
+ _cmd_env["PATH"] = _venv_bin + os.pathsep + _cmd_env.get("PATH", "")
174
+ _cmd_env["VIRTUAL_ENV"] = os.path.dirname(_venv_bin)
175
+
176
+ async def execute_command(command: str, timeout: int = 60) -> dict:
177
+ """Execute a shell command in the working directory.
178
+ Use for pip install, file listing, system tools, etc."""
179
+ try:
180
+ proc = await asyncio.get_event_loop().run_in_executor(
181
+ None,
182
+ lambda: subprocess.run(
183
+ command, shell=True, capture_output=True, text=True,
184
+ timeout=timeout, cwd=work_dir, env=_cmd_env,
185
+ ),
186
+ )
187
+ result = {"stdout": proc.stdout, "stderr": proc.stderr, "returncode": proc.returncode}
188
+ max_len = 50000
189
+ for key in ("stdout", "stderr"):
190
+ if len(result[key]) > max_len:
191
+ result[key] = result[key][:max_len] + "\n... (truncated)"
192
+ except subprocess.TimeoutExpired:
193
+ result = {"stdout": "", "stderr": f"Command timed out after {timeout}s", "returncode": -1}
194
+ # Audit log
195
+ if artifact_mgr:
196
+ try:
197
+ await artifact_mgr.log_command_execution(command, result)
198
+ except Exception:
199
+ pass
200
+ return result
201
+
202
+ # ── Endpoint: upload_file ──
203
+ async def upload_file(file_path: str, remote_name: str = None) -> dict:
204
+ """Upload a file from the working directory to the shared artifact store.
205
+ Returns a presigned download URL for the remote agent."""
206
+ if artifact_mgr is None:
207
+ return {"error": "Artifact manager not available. File sharing is disabled."}
208
+
209
+ # Security: only allow files within work_dir or data_dir
210
+ abs_path = os.path.abspath(file_path)
211
+ allowed = abs_path.startswith(os.path.abspath(work_dir))
212
+ if data_dir and data_dir != "(none)":
213
+ allowed = allowed or abs_path.startswith(os.path.abspath(data_dir))
214
+ if not allowed:
215
+ return {"error": f"Access denied: file must be within work_dir or data_dir"}
216
+
217
+ if not os.path.exists(abs_path):
218
+ return {"error": f"File not found: {file_path}"}
219
+
220
+ if remote_name is None:
221
+ remote_name = os.path.basename(abs_path)
222
+
223
+ try:
224
+ url = await artifact_mgr.upload_file(abs_path, remote_name)
225
+ return {"url": url, "remote_path": remote_name}
226
+ except Exception as e:
227
+ return {"error": f"Upload failed: {str(e)}"}
228
+
229
+ # ── Endpoint: list_shared_files ──
230
+ async def list_shared_files() -> list:
231
+ """List all files uploaded to the session artifact store."""
232
+ if artifact_mgr is None:
233
+ return []
234
+ try:
235
+ return await artifact_mgr.list_files()
236
+ except Exception:
237
+ return []
238
+
239
+ # ── Endpoint: get_docs ──
240
+ async def get_docs() -> str:
241
+ """Get documentation about the sandboxed environment."""
242
+ return f"""# Safe Colab Environment
243
+
244
+ ## Data Directory (read-only): {data_dir}
245
+ Contains the dataset files provided by the data owner.
246
+
247
+ ## Working Directory (read-write): {work_dir}
248
+ Write outputs, logs, results, and reports here.
249
+
250
+ ## Available Endpoints
251
+ - `run_code(code)` — Execute Python (persistent kernel, like Jupyter)
252
+ - `execute_command(command)` — Run shell commands (pip, ls, cat, etc.)
253
+ - `upload_file(file_path)` — Upload file to artifact store, get shareable URL
254
+ - `list_shared_files()` — List uploaded files
255
+ - `get_docs()` — This documentation
256
+
257
+ ## Capabilities
258
+ - Full Python 3 environment with persistent Jupyter kernel
259
+ - Install any pip package: `execute_command("pip install numpy pandas matplotlib")`
260
+ - Read data files from {data_dir}
261
+ - Write results to {work_dir}
262
+ - Share large files (plots, datasets) via `upload_file()` → returns download URL
263
+
264
+ ## Tips
265
+ - Install packages FIRST with execute_command before using them in run_code
266
+ - Variables persist across run_code calls (like Jupyter cells)
267
+ - For large outputs, write to file then upload_file() instead of printing
268
+ - All operations are logged for auditing
269
+ """
270
+
271
+ # ── Register service ──
272
+ service_id = f"safe-colab-{secrets.token_hex(16)}"
273
+
274
+ svc_info = await server.register_service({
275
+ "id": service_id,
276
+ "name": "Safe Colab Sandbox",
277
+ "type": "code-interpreter",
278
+ "description": "Sandboxed Python environment for safe remote code execution",
279
+ "config": {
280
+ "visibility": "unlisted",
281
+ "require_context": False,
282
+ "run_in_executor": True,
283
+ },
284
+ "run_code": run_code,
285
+ "execute_command": execute_command,
286
+ "upload_file": upload_file,
287
+ "list_shared_files": list_shared_files,
288
+ "get_docs": get_docs,
289
+ })
290
+
291
+ actual_id = svc_info.get("id", service_id) if isinstance(svc_info, dict) else service_id
292
+ service_url = _build_service_url(server_url, actual_id)
293
+
294
+ instructions = _build_agent_instructions(
295
+ service_url=service_url,
296
+ data_dir=data_dir,
297
+ work_dir=work_dir,
298
+ )
299
+
300
+ print(f"[hypha] Service registered: {actual_id}")
301
+ print(f"[hypha] Service URL: {service_url}")
302
+ return server, svc_info, instructions, service_url
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.4
2
+ Name: safe-colab-cli
3
+ Version: 0.1.0
4
+ Summary: CLI tool for safe collaboration - sandboxed Jupyter kernel with remote Hypha service access
5
+ Author: Amun AI AB
6
+ License: MIT
7
+ Keywords: safe-colab,sandbox,jupyter,hypha,remote-execution
8
+ Requires-Python: >=3.9
9
+ Description-Content-Type: text/markdown
10
+ Requires-Dist: hypha-rpc>=0.20.0
11
+ Requires-Dist: jupyter_client>=8.0
12
+ Requires-Dist: ipykernel>=6.0
13
+ Requires-Dist: click>=8.0
14
+ Requires-Dist: python-dotenv>=1.0
15
+ Requires-Dist: httpx>=0.24.0
16
+ Provides-Extra: sandbox
17
+ Requires-Dist: nono-py>=0.1.0; extra == "sandbox"
18
+ Provides-Extra: dev
19
+ Requires-Dist: pytest>=7.0; extra == "dev"
20
+ Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
@@ -0,0 +1,16 @@
1
+ pyproject.toml
2
+ safe_colab_cli/__init__.py
3
+ safe_colab_cli/__main__.py
4
+ safe_colab_cli/artifacts.py
5
+ safe_colab_cli/cli.py
6
+ safe_colab_cli/kernel.py
7
+ safe_colab_cli/sandbox.py
8
+ safe_colab_cli/service.py
9
+ safe_colab_cli.egg-info/PKG-INFO
10
+ safe_colab_cli.egg-info/SOURCES.txt
11
+ safe_colab_cli.egg-info/dependency_links.txt
12
+ safe_colab_cli.egg-info/entry_points.txt
13
+ safe_colab_cli.egg-info/requires.txt
14
+ safe_colab_cli.egg-info/top_level.txt
15
+ tests/test_e2e.py
16
+ tests/test_kernel.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ safe-colab = safe_colab_cli.cli:main
@@ -0,0 +1,13 @@
1
+ hypha-rpc>=0.20.0
2
+ jupyter_client>=8.0
3
+ ipykernel>=6.0
4
+ click>=8.0
5
+ python-dotenv>=1.0
6
+ httpx>=0.24.0
7
+
8
+ [dev]
9
+ pytest>=7.0
10
+ pytest-asyncio>=0.21
11
+
12
+ [sandbox]
13
+ nono-py>=0.1.0
@@ -0,0 +1 @@
1
+ safe_colab_cli
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,143 @@
1
+ """End-to-end test: kernel + Hypha service + artifacts + remote code execution."""
2
+
3
+ import os
4
+ import pytest
5
+ from dotenv import load_dotenv
6
+
7
+ from safe_colab_cli.kernel import SandboxKernel
8
+ from safe_colab_cli.service import register_service
9
+
10
+ # Load .env for credentials
11
+ load_dotenv(os.path.join(os.path.dirname(__file__), "..", ".env"), override=True)
12
+
13
+ SERVER_URL = os.environ.get("HYPHA_SERVER_URL", "https://hypha.aicell.io")
14
+ WORKSPACE = os.environ.get("HYPHA_WORKSPACE", "safe-colab")
15
+ TOKEN = os.environ.get("HYPHA_TOKEN")
16
+
17
+
18
+ @pytest.fixture
19
+ async def kernel():
20
+ k = SandboxKernel()
21
+ await k.start()
22
+ yield k
23
+ await k.stop()
24
+
25
+
26
+ @pytest.mark.asyncio
27
+ @pytest.mark.skipif(not TOKEN, reason="HYPHA_TOKEN not set")
28
+ async def test_e2e_full_workflow(kernel, tmp_path):
29
+ """Full end-to-end: register, run code, shell commands, upload files, list files."""
30
+ work_dir = str(tmp_path / "workspace")
31
+ os.makedirs(work_dir, exist_ok=True)
32
+ data_dir = str(tmp_path / "data")
33
+ os.makedirs(data_dir, exist_ok=True)
34
+
35
+ # Create test data
36
+ with open(os.path.join(data_dir, "test.csv"), "w") as f:
37
+ f.write("name,value\nAlice,42\nBob,17\n")
38
+
39
+ # Register service
40
+ server, svc_info, instructions, service_url = await register_service(
41
+ server_url=SERVER_URL,
42
+ workspace=WORKSPACE,
43
+ token=TOKEN,
44
+ kernel=kernel,
45
+ data_dir=data_dir,
46
+ work_dir=work_dir,
47
+ )
48
+
49
+ assert svc_info is not None
50
+ assert service_url.startswith("https://")
51
+ assert "safe-colab-" in svc_info["id"] # Random unlisted ID
52
+ assert "run_code" in instructions
53
+ assert "execute_command" in instructions
54
+ assert "upload_file" in instructions
55
+ assert "curl" in instructions
56
+ print(f"\nService URL: {service_url}")
57
+
58
+ # Connect as remote agent
59
+ from hypha_rpc import connect_to_server as connect
60
+ remote_server = await connect({
61
+ "server_url": SERVER_URL,
62
+ "workspace": WORKSPACE,
63
+ "token": TOKEN,
64
+ })
65
+ svc = await remote_server.get_service(svc_info["id"])
66
+
67
+ # Test 1: run_code - simple print
68
+ result = await svc.run_code("print('Hello from remote!')")
69
+ assert result["stdout"].strip() == "Hello from remote!"
70
+ assert result["error"] is None
71
+ print("Test 1 passed: run_code basic")
72
+
73
+ # Test 2: run_code - expression result
74
+ result = await svc.run_code("2 ** 10")
75
+ assert result["result"] == "1024"
76
+ print("Test 2 passed: run_code expression")
77
+
78
+ # Test 3: run_code - error handling
79
+ result = await svc.run_code("raise ValueError('test')")
80
+ assert result["error"]["ename"] == "ValueError"
81
+ print("Test 3 passed: run_code error")
82
+
83
+ # Test 4: run_code - state persistence
84
+ await svc.run_code("my_var = 'persistent_value'")
85
+ result = await svc.run_code("print(my_var)")
86
+ assert result["stdout"].strip() == "persistent_value"
87
+ print("Test 4 passed: state persistence")
88
+
89
+ # Test 5: execute_command - ls
90
+ result = await svc.execute_command(f"ls {data_dir}")
91
+ assert "test.csv" in result["stdout"]
92
+ assert result["returncode"] == 0
93
+ print("Test 5 passed: execute_command ls")
94
+
95
+ # Test 6: execute_command - pip
96
+ result = await svc.execute_command("python -c 'import json; print(json.dumps({\"ok\": True}))'")
97
+ assert result["returncode"] == 0
98
+ print("Test 6 passed: execute_command python")
99
+
100
+ # Test 7: run_code - read CSV, write output
101
+ code = f"""
102
+ import csv
103
+ with open("{data_dir}/test.csv") as f:
104
+ reader = csv.DictReader(f)
105
+ rows = list(reader)
106
+ total = sum(int(r["value"]) for r in rows)
107
+ print(f"Total: {{total}}")
108
+
109
+ # Write results
110
+ with open("{work_dir}/results.txt", "w") as f:
111
+ f.write(f"Total: {{total}}\\nRows: {{len(rows)}}\\n")
112
+ print("Results saved")
113
+ """
114
+ result = await svc.run_code(code)
115
+ assert "Total: 59" in result["stdout"]
116
+ assert "Results saved" in result["stdout"]
117
+ print("Test 7 passed: CSV read + file write")
118
+
119
+ # Test 8: upload_file
120
+ result = await svc.upload_file(f"{work_dir}/results.txt")
121
+ assert "url" in result
122
+ assert result["url"].startswith("https://")
123
+ print(f"Test 8 passed: upload_file → {result['url'][:80]}...")
124
+
125
+ # Test 9: list_shared_files
126
+ files = await svc.list_shared_files()
127
+ # Should have at least the results.txt and audit_log.jsonl
128
+ print(f"Test 9 passed: list_shared_files → {len(files)} files")
129
+
130
+ # Test 10: get_docs
131
+ docs = await svc.get_docs()
132
+ assert "run_code" in docs
133
+ assert "execute_command" in docs
134
+ assert "upload_file" in docs
135
+ print(f"Test 10 passed: get_docs → {len(docs)} chars")
136
+
137
+ # Test 11: upload_file security - reject outside work_dir
138
+ result = await svc.upload_file("/etc/passwd")
139
+ assert "error" in result
140
+ assert "Access denied" in result["error"]
141
+ print("Test 11 passed: upload_file security check")
142
+
143
+ print("\nAll e2e tests passed!")
@@ -0,0 +1,78 @@
1
+ """Test the Jupyter kernel manager."""
2
+
3
+ import pytest
4
+ from safe_colab_cli.kernel import SandboxKernel
5
+
6
+
7
+ @pytest.fixture
8
+ async def kernel():
9
+ k = SandboxKernel()
10
+ await k.start()
11
+ yield k
12
+ await k.stop()
13
+
14
+
15
+ @pytest.mark.asyncio
16
+ async def test_kernel_start_stop():
17
+ """Test kernel can start and stop."""
18
+ k = SandboxKernel()
19
+ await k.start()
20
+ assert k._km is not None
21
+ assert k._kc is not None
22
+ await k.stop()
23
+
24
+
25
+ @pytest.mark.asyncio
26
+ async def test_kernel_execute_simple(kernel):
27
+ """Test simple code execution."""
28
+ result = await kernel.execute("print('hello world')")
29
+ assert result["stdout"].strip() == "hello world"
30
+ assert result["error"] is None
31
+
32
+
33
+ @pytest.mark.asyncio
34
+ async def test_kernel_execute_result(kernel):
35
+ """Test expression result capture."""
36
+ result = await kernel.execute("2 + 3")
37
+ assert result["result"] == "5"
38
+ assert result["error"] is None
39
+
40
+
41
+ @pytest.mark.asyncio
42
+ async def test_kernel_execute_error(kernel):
43
+ """Test error capture."""
44
+ result = await kernel.execute("1/0")
45
+ assert result["error"] is not None
46
+ assert result["error"]["ename"] == "ZeroDivisionError"
47
+
48
+
49
+ @pytest.mark.asyncio
50
+ async def test_kernel_execute_multiline(kernel):
51
+ """Test multiline code execution."""
52
+ code = """
53
+ import os
54
+ cwd = os.getcwd()
55
+ print(f"cwd: {cwd}")
56
+ """
57
+ result = await kernel.execute(code)
58
+ assert "cwd:" in result["stdout"]
59
+ assert result["error"] is None
60
+
61
+
62
+ @pytest.mark.asyncio
63
+ async def test_kernel_execute_stderr(kernel):
64
+ """Test stderr capture."""
65
+ code = """
66
+ import sys
67
+ print("error msg", file=sys.stderr)
68
+ """
69
+ result = await kernel.execute(code)
70
+ assert "error msg" in result["stderr"]
71
+
72
+
73
+ @pytest.mark.asyncio
74
+ async def test_kernel_state_persistence(kernel):
75
+ """Test that variables persist across executions."""
76
+ await kernel.execute("x = 42")
77
+ result = await kernel.execute("print(x)")
78
+ assert result["stdout"].strip() == "42"