mcp-colab-gpu 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_colab_gpu-0.2.0/.github/workflows/publish.yml +31 -0
- mcp_colab_gpu-0.2.0/.gitignore +7 -0
- mcp_colab_gpu-0.2.0/LICENSE +22 -0
- mcp_colab_gpu-0.2.0/PKG-INFO +21 -0
- mcp_colab_gpu-0.2.0/README.md +248 -0
- mcp_colab_gpu-0.2.0/mcp_colab_gpu/__init__.py +1 -0
- mcp_colab_gpu-0.2.0/mcp_colab_gpu/colab_runtime.py +448 -0
- mcp_colab_gpu-0.2.0/mcp_colab_gpu/server.py +295 -0
- mcp_colab_gpu-0.2.0/pyproject.toml +37 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: read
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
publish:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
environment: pypi
|
|
15
|
+
permissions:
|
|
16
|
+
id-token: write
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: "3.12"
|
|
23
|
+
|
|
24
|
+
- name: Install build tools
|
|
25
|
+
run: pip install build
|
|
26
|
+
|
|
27
|
+
- name: Build package
|
|
28
|
+
run: python -m build
|
|
29
|
+
|
|
30
|
+
- name: Publish to PyPI
|
|
31
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Paritosh Dwivedi (original mcp-server-colab-exec)
|
|
4
|
+
Copyright (c) 2026 Masaya Hirano (mcp-colab-gpu — extended fork)
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
SOFTWARE.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mcp-colab-gpu
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: MCP server for executing Python on Google Colab GPU/TPU runtimes — supports all Colab Pro accelerators (T4/L4/A100/H100/G4) with high-memory option and security hardening. Fork of mcp-server-colab-exec by Paritosh Dwivedi.
|
|
5
|
+
Project-URL: Homepage, https://github.com/mio-github/mcp-colab-gpu
|
|
6
|
+
Project-URL: Repository, https://github.com/mio-github/mcp-colab-gpu
|
|
7
|
+
Project-URL: Original Project, https://github.com/pdwi2020/mcp-server-colab-exec
|
|
8
|
+
Author: Masaya Hirano
|
|
9
|
+
Maintainer: Masaya Hirano
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: colab,colab-pro,cuda,gpu,machine-learning,mcp,tpu
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Requires-Dist: google-auth-oauthlib>=1.0
|
|
18
|
+
Requires-Dist: google-auth>=2.0
|
|
19
|
+
Requires-Dist: mcp[cli]>=1.6.0
|
|
20
|
+
Requires-Dist: requests>=2.28
|
|
21
|
+
Requires-Dist: websocket-client>=1.5
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
# mcp-colab-gpu
|
|
2
|
+
|
|
3
|
+
Extended MCP server for Google Colab GPU/TPU runtimes
|
|
4
|
+
|
|
5
|
+
> Based on [mcp-server-colab-exec](https://github.com/pdwi2020/mcp-server-colab-exec) by [Paritosh Dwivedi](https://github.com/pdwi2020) (MIT License). Thank you for the original idea and implementation! / 素晴らしいアイデアと実装に感謝します!
|
|
6
|
+
|
|
7
|
+
MCP server that allocates Google Colab GPU/TPU runtimes and executes Python code on them. Lets any MCP-compatible AI assistant -- Claude Code, Claude Desktop, Gemini CLI, Cline, and others -- run GPU/TPU-accelerated code (CUDA, PyTorch, TensorFlow, JAX) without local GPU hardware.
|
|
8
|
+
|
|
9
|
+
## What's different from the original
|
|
10
|
+
|
|
11
|
+
| Feature | mcp-server-colab-exec | mcp-colab-gpu |
|
|
12
|
+
|---|---|---|
|
|
13
|
+
| GPU support | T4, L4 | **T4, L4, A100, H100, G4** |
|
|
14
|
+
| TPU support | -- | **V5E1, V6E1** |
|
|
15
|
+
| High-memory runtime | -- | **Supported** |
|
|
16
|
+
| Input validation | -- | **Accelerator + timeout validation** |
|
|
17
|
+
| Path traversal protection | -- | **.py-only + resolved symlinks** |
|
|
18
|
+
| Zip slip protection | -- | **Member path validation** |
|
|
19
|
+
| Token file permissions | Default | **0600 (owner-only)** |
|
|
20
|
+
| Token refresh error logging | Silent | **Logged with re-auth fallback** |
|
|
21
|
+
|
|
22
|
+
## Supported accelerators
|
|
23
|
+
|
|
24
|
+
| Accelerator | VRAM / Memory | Tier |
|
|
25
|
+
|---|---|---|
|
|
26
|
+
| `T4` | 16 GB | Free |
|
|
27
|
+
| `L4` | 22 GB | Colab Pro |
|
|
28
|
+
| `A100` | 40 GB | Colab Pro / Pro+ |
|
|
29
|
+
| `H100` | 80 GB | Colab Pro+ |
|
|
30
|
+
| `G4` | 95 GB | Colab Pro+ |
|
|
31
|
+
| `V5E1` | TPU v5e-1 | Colab Pro+ |
|
|
32
|
+
| `V6E1` | TPU v6e-1 | Colab Pro+ |
|
|
33
|
+
|
|
34
|
+
## Prerequisites
|
|
35
|
+
|
|
36
|
+
- Python 3.10+
|
|
37
|
+
- A Google account with access to [Google Colab](https://colab.research.google.com)
|
|
38
|
+
- On first run, a browser window opens for OAuth2 consent. The token is cached at `~/.config/colab-exec/token.json` for subsequent runs.
|
|
39
|
+
|
|
40
|
+
## Installation
|
|
41
|
+
|
|
42
|
+
### With uvx (recommended)
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
uvx mcp-colab-gpu
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### With pip
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install mcp-colab-gpu
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Claude Code configuration
|
|
55
|
+
|
|
56
|
+
Add to your project's `.mcp.json` or `~/.claude/.mcp.json`:
|
|
57
|
+
|
|
58
|
+
```json
|
|
59
|
+
{
|
|
60
|
+
"mcpServers": {
|
|
61
|
+
"colab-gpu": {
|
|
62
|
+
"command": "uvx",
|
|
63
|
+
"args": ["mcp-colab-gpu"]
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Or via the CLI:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
claude mcp add colab-gpu -- uvx mcp-colab-gpu
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Claude Desktop configuration
|
|
76
|
+
|
|
77
|
+
Add to `claude_desktop_config.json`:
|
|
78
|
+
|
|
79
|
+
```json
|
|
80
|
+
{
|
|
81
|
+
"mcpServers": {
|
|
82
|
+
"colab-gpu": {
|
|
83
|
+
"command": "uvx",
|
|
84
|
+
"args": ["mcp-colab-gpu"]
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Tools
|
|
91
|
+
|
|
92
|
+
### `colab_execute`
|
|
93
|
+
|
|
94
|
+
Execute inline Python code on a Colab GPU/TPU runtime.
|
|
95
|
+
|
|
96
|
+
| Parameter | Type | Default | Description |
|
|
97
|
+
|---|---|---|---|
|
|
98
|
+
| `code` | string | -- | Python code to execute (required) |
|
|
99
|
+
| `accelerator` | string | `"T4"` | GPU/TPU type: `T4`, `L4`, `A100`, `H100`, `G4`, `V5E1`, `V6E1` |
|
|
100
|
+
| `high_memory` | bool | `false` | Enable high-memory runtime (more RAM) |
|
|
101
|
+
| `timeout` | int | `300` | Max execution time in seconds (10--3600) |
|
|
102
|
+
|
|
103
|
+
Returns JSON with per-cell output, errors, and stderr.
|
|
104
|
+
|
|
105
|
+
### `colab_execute_file`
|
|
106
|
+
|
|
107
|
+
Execute a local `.py` file on a Colab GPU/TPU runtime.
|
|
108
|
+
|
|
109
|
+
| Parameter | Type | Default | Description |
|
|
110
|
+
|---|---|---|---|
|
|
111
|
+
| `file_path` | string | -- | Path to a local `.py` file (required) |
|
|
112
|
+
| `accelerator` | string | `"T4"` | GPU/TPU type: `T4`, `L4`, `A100`, `H100`, `G4`, `V5E1`, `V6E1` |
|
|
113
|
+
| `high_memory` | bool | `false` | Enable high-memory runtime (more RAM) |
|
|
114
|
+
| `timeout` | int | `300` | Max execution time in seconds (10--3600) |
|
|
115
|
+
|
|
116
|
+
Returns JSON with per-cell output, errors, and stderr.
|
|
117
|
+
|
|
118
|
+
### `colab_execute_notebook`
|
|
119
|
+
|
|
120
|
+
Execute code and collect all generated artifacts (images, CSVs, models, etc.).
|
|
121
|
+
|
|
122
|
+
| Parameter | Type | Default | Description |
|
|
123
|
+
|---|---|---|---|
|
|
124
|
+
| `code` | string | -- | Python code to execute (required) |
|
|
125
|
+
| `output_dir` | string | -- | Local directory for downloaded artifacts (required) |
|
|
126
|
+
| `accelerator` | string | `"T4"` | GPU/TPU type: `T4`, `L4`, `A100`, `H100`, `G4`, `V5E1`, `V6E1` |
|
|
127
|
+
| `high_memory` | bool | `false` | Enable high-memory runtime (more RAM) |
|
|
128
|
+
| `timeout` | int | `300` | Max execution time in seconds (10--3600) |
|
|
129
|
+
|
|
130
|
+
Artifacts are downloaded as a zip and extracted into `output_dir`.
|
|
131
|
+
|
|
132
|
+
## Examples
|
|
133
|
+
|
|
134
|
+
### Check GPU availability
|
|
135
|
+
|
|
136
|
+
```
|
|
137
|
+
colab_execute(code="import torch; print(torch.cuda.is_available()); print(torch.cuda.get_device_name(0))")
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### Matrix multiplication benchmark on A100
|
|
141
|
+
|
|
142
|
+
```
|
|
143
|
+
colab_execute(
|
|
144
|
+
code="""
|
|
145
|
+
import torch
|
|
146
|
+
import time
|
|
147
|
+
|
|
148
|
+
device = torch.device('cuda')
|
|
149
|
+
print(f"GPU: {torch.cuda.get_device_name(0)}")
|
|
150
|
+
print(f"Memory: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB")
|
|
151
|
+
|
|
152
|
+
# Benchmark: large matrix multiplication
|
|
153
|
+
a = torch.randn(8192, 8192, device=device)
|
|
154
|
+
b = torch.randn(8192, 8192, device=device)
|
|
155
|
+
|
|
156
|
+
torch.cuda.synchronize()
|
|
157
|
+
start = time.time()
|
|
158
|
+
c = torch.mm(a, b)
|
|
159
|
+
torch.cuda.synchronize()
|
|
160
|
+
elapsed = time.time() - start
|
|
161
|
+
|
|
162
|
+
tflops = 2 * 8192**3 / elapsed / 1e12
|
|
163
|
+
print(f"8192x8192 matmul: {elapsed:.3f}s ({tflops:.1f} TFLOPS)")
|
|
164
|
+
""",
|
|
165
|
+
accelerator="A100",
|
|
166
|
+
high_memory=True,
|
|
167
|
+
)
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### LLM inference on H100
|
|
171
|
+
|
|
172
|
+
```
|
|
173
|
+
colab_execute(
|
|
174
|
+
code="""
|
|
175
|
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
176
|
+
import torch
|
|
177
|
+
|
|
178
|
+
model_name = "microsoft/phi-2"
|
|
179
|
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
180
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
181
|
+
model_name, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
prompt = "Explain quantum computing in one paragraph:"
|
|
185
|
+
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
|
186
|
+
outputs = model.generate(**inputs, max_new_tokens=200)
|
|
187
|
+
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
|
188
|
+
""",
|
|
189
|
+
accelerator="H100",
|
|
190
|
+
timeout=600,
|
|
191
|
+
)
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### Train and download model weights
|
|
195
|
+
|
|
196
|
+
```
|
|
197
|
+
colab_execute_notebook(
|
|
198
|
+
code="""
|
|
199
|
+
import torch
|
|
200
|
+
import torch.nn as nn
|
|
201
|
+
|
|
202
|
+
model = nn.Sequential(nn.Linear(784, 256), nn.ReLU(), nn.Linear(256, 10))
|
|
203
|
+
model = model.cuda()
|
|
204
|
+
# ... training code ...
|
|
205
|
+
torch.save(model.state_dict(), '/tmp/model.pt')
|
|
206
|
+
print("Model saved!")
|
|
207
|
+
""",
|
|
208
|
+
output_dir="./outputs",
|
|
209
|
+
accelerator="T4",
|
|
210
|
+
)
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
## Authentication
|
|
214
|
+
|
|
215
|
+
On first use, the server opens a browser window for Google OAuth2 consent. The access token and refresh token are cached at `~/.config/colab-exec/token.json`. Subsequent runs use the cached token and refresh it automatically.
|
|
216
|
+
|
|
217
|
+
The OAuth2 client credentials are the same ones used by the official Google Colab VS Code extension (`google.colab@0.3.0`). They are intentionally public.
|
|
218
|
+
|
|
219
|
+
## Security improvements
|
|
220
|
+
|
|
221
|
+
This fork includes the following security hardening over the original:
|
|
222
|
+
|
|
223
|
+
- **Path validation in `colab_execute_file`**: Only `.py` files are accepted. Paths are resolved through `pathlib.Path.resolve()` to prevent symlink-based traversal attacks.
|
|
224
|
+
- **Zip slip protection in `colab_execute_notebook`**: Every member in a downloaded artifact zip is validated to ensure its resolved path stays within the target `output_dir`, preventing directory traversal via crafted zip entries.
|
|
225
|
+
- **Token file permissions**: The OAuth token cache file (`~/.config/colab-exec/token.json`) is created with `0o600` permissions (owner read/write only) using `os.open` instead of plain `open`.
|
|
226
|
+
- **Input validation**: The `accelerator` parameter is validated against the known set of supported accelerators, and `timeout` is bounded to 10--3600 seconds, before any network calls are made.
|
|
227
|
+
- **Token refresh error logging**: When automatic token refresh fails, the error is logged to stderr with a warning message before falling back to re-authentication, instead of silently discarding the error.
|
|
228
|
+
|
|
229
|
+
## Troubleshooting
|
|
230
|
+
|
|
231
|
+
**"GPU quota exceeded"** -- Colab has usage limits. Wait and retry, or use a different Google account.
|
|
232
|
+
|
|
233
|
+
**"Timed out creating kernel session"** -- The runtime took too long to start. Retry -- Colab sometimes has delays during peak usage.
|
|
234
|
+
|
|
235
|
+
**"Authentication failed"** -- Delete `~/.config/colab-exec/token.json` and re-authenticate.
|
|
236
|
+
|
|
237
|
+
**OAuth browser window doesn't open** -- Ensure you're running in an environment with a browser. For headless servers, authenticate on a machine with a browser first and copy the token file.
|
|
238
|
+
|
|
239
|
+
## About the maintainer
|
|
240
|
+
|
|
241
|
+
Masaya Hirano -- CEO of [Mio System Co., Ltd.](https://miosystem.co.jp), CRO of [TrustedAI Corporation](https://trustedai.co.jp)
|
|
242
|
+
|
|
243
|
+
## License
|
|
244
|
+
|
|
245
|
+
[MIT](LICENSE)
|
|
246
|
+
|
|
247
|
+
Original work: Copyright (c) 2026 Paritosh Dwivedi
|
|
248
|
+
Extended fork: Copyright (c) 2026 Masaya Hirano
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.2.0"
|
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
"""Colab GPU runtime engine --- auth, allocate, execute, cleanup.
|
|
2
|
+
|
|
3
|
+
Provides a clean importable API for allocating Google Colab GPU runtimes
|
|
4
|
+
and executing Python code on them via the Jupyter kernel WebSocket protocol.
|
|
5
|
+
|
|
6
|
+
Original: mcp-server-colab-exec v0.1.0 by Paritosh Dwivedi (MIT License)
|
|
7
|
+
Extended: mcp-colab-gpu by Masaya Hirano --- all Colab Pro GPUs, high-memory, security fixes.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import io
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
import sys
|
|
15
|
+
import threading
|
|
16
|
+
import time
|
|
17
|
+
import uuid
|
|
18
|
+
|
|
19
|
+
import requests
|
|
20
|
+
import websocket
|
|
21
|
+
from google.auth.transport.requests import Request as GoogleRequest
|
|
22
|
+
from google.oauth2.credentials import Credentials
|
|
23
|
+
from google_auth_oauthlib.flow import InstalledAppFlow
|
|
24
|
+
|
|
25
|
+
os.environ.setdefault("OAUTHLIB_RELAX_TOKEN_SCOPE", "1")
|
|
26
|
+
|
|
27
|
+
COLAB_API = "https://colab.research.google.com"
|
|
28
|
+
SCOPES = [
|
|
29
|
+
"https://www.googleapis.com/auth/colaboratory",
|
|
30
|
+
"profile",
|
|
31
|
+
"email",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
# OAuth2 client credentials from the Colab VS Code extension (google.colab@0.3.0)
|
|
35
|
+
# These are intentionally public (the extension names them "ClientNotSoSecret")
|
|
36
|
+
CLIENT_CONFIG = {
|
|
37
|
+
"installed": {
|
|
38
|
+
"client_id": "1014160490159-cvot3bea7tgkp72a4m29h20d9ddo6bne.apps.googleusercontent.com",
|
|
39
|
+
"client_secret": "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",
|
|
40
|
+
"auth_uri": "https://accounts.google.com/o/oauth2/v2/auth",
|
|
41
|
+
"token_uri": "https://oauth2.googleapis.com/token",
|
|
42
|
+
"redirect_uris": ["http://localhost"],
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
TOKEN_CACHE_DIR = os.path.expanduser("~/.config/colab-exec")
|
|
47
|
+
TOKEN_CACHE_PATH = os.path.join(TOKEN_CACHE_DIR, "token.json")
|
|
48
|
+
|
|
49
|
+
HIGHMEM_REQUIRED_ACCELERATORS = {"V5E1", "V6E1"}
|
|
50
|
+
VALID_ACCELERATORS = {"T4", "L4", "A100", "H100", "G4", "V5E1", "V6E1"}
|
|
51
|
+
MAX_TIMEOUT = 3600
|
|
52
|
+
MIN_TIMEOUT = 10
|
|
53
|
+
EPHEMERAL_AUTH_TYPES = {"dfs_ephemeral", "auth_user_ephemeral"}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def validate_params(accelerator: str, timeout: int) -> None:
|
|
57
|
+
"""Validate accelerator and timeout parameters."""
|
|
58
|
+
if accelerator not in VALID_ACCELERATORS:
|
|
59
|
+
raise ValueError(
|
|
60
|
+
f"Invalid accelerator '{accelerator}'. "
|
|
61
|
+
f"Must be one of: {sorted(VALID_ACCELERATORS)}"
|
|
62
|
+
)
|
|
63
|
+
if not (MIN_TIMEOUT <= timeout <= MAX_TIMEOUT):
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"timeout must be between {MIN_TIMEOUT} and {MAX_TIMEOUT}, got {timeout}"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def get_credentials() -> Credentials:
|
|
70
|
+
"""Load cached credentials or run the browser OAuth2 flow."""
|
|
71
|
+
creds = None
|
|
72
|
+
|
|
73
|
+
if os.path.exists(TOKEN_CACHE_PATH):
|
|
74
|
+
creds = Credentials.from_authorized_user_file(TOKEN_CACHE_PATH, SCOPES)
|
|
75
|
+
|
|
76
|
+
if creds and creds.expired and creds.refresh_token:
|
|
77
|
+
try:
|
|
78
|
+
creds.refresh(GoogleRequest())
|
|
79
|
+
_save_credentials(creds)
|
|
80
|
+
except Exception as e:
|
|
81
|
+
print(f"[colab-gpu] Warning: token refresh failed ({e}), re-authenticating...", file=sys.stderr)
|
|
82
|
+
creds = None
|
|
83
|
+
|
|
84
|
+
if not creds or not creds.valid:
|
|
85
|
+
flow = InstalledAppFlow.from_client_config(CLIENT_CONFIG, SCOPES)
|
|
86
|
+
creds = flow.run_local_server(
|
|
87
|
+
port=0,
|
|
88
|
+
access_type="offline",
|
|
89
|
+
prompt="consent",
|
|
90
|
+
success_message="Authentication successful! You can close this tab.",
|
|
91
|
+
)
|
|
92
|
+
_save_credentials(creds)
|
|
93
|
+
|
|
94
|
+
return creds
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _save_credentials(creds: Credentials):
|
|
98
|
+
os.makedirs(TOKEN_CACHE_DIR, exist_ok=True)
|
|
99
|
+
fd = os.open(TOKEN_CACHE_PATH, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
|
100
|
+
with os.fdopen(fd, "w") as f:
|
|
101
|
+
f.write(creds.to_json())
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _strip_xssi(text: str) -> dict:
|
|
105
|
+
"""Strip the )]}' XSSI prefix and parse JSON."""
|
|
106
|
+
if text.startswith(")]}'"):
|
|
107
|
+
text = text[text.index("\n") + 1:]
|
|
108
|
+
return json.loads(text)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _colab_headers(token: str, extra: dict = None) -> dict:
|
|
112
|
+
headers = {
|
|
113
|
+
"Authorization": f"Bearer {token}",
|
|
114
|
+
"Accept": "application/json",
|
|
115
|
+
"X-Colab-Client-Agent": "vscode",
|
|
116
|
+
}
|
|
117
|
+
if extra:
|
|
118
|
+
headers.update(extra)
|
|
119
|
+
return headers
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def generate_notebook_hash() -> str:
|
|
123
|
+
"""Generate a Colab-valid notebook hash."""
|
|
124
|
+
raw_uuid = str(uuid.uuid4())
|
|
125
|
+
return raw_uuid.replace("-", "_") + "." * (44 - len(raw_uuid))
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _build_assign_params(
|
|
129
|
+
nbh: str,
|
|
130
|
+
accelerator: str,
|
|
131
|
+
high_memory: bool = False,
|
|
132
|
+
) -> dict:
|
|
133
|
+
params = {
|
|
134
|
+
"nbh": nbh,
|
|
135
|
+
"authuser": "0",
|
|
136
|
+
}
|
|
137
|
+
if accelerator:
|
|
138
|
+
params["variant"] = "GPU"
|
|
139
|
+
params["accelerator"] = accelerator
|
|
140
|
+
if high_memory or accelerator in HIGHMEM_REQUIRED_ACCELERATORS:
|
|
141
|
+
params["shape"] = "hm"
|
|
142
|
+
return params
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _parse_assignment(assignment: dict) -> dict:
|
|
146
|
+
endpoint = assignment.get("endpoint")
|
|
147
|
+
proxy_info = assignment.get("runtimeProxyInfo", {})
|
|
148
|
+
proxy_url = (proxy_info.get("url") or "").rstrip("/")
|
|
149
|
+
proxy_token = proxy_info.get("token")
|
|
150
|
+
return {
|
|
151
|
+
"endpoint": endpoint,
|
|
152
|
+
"proxy_url": proxy_url,
|
|
153
|
+
"proxy_token": proxy_token,
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def allocate_runtime(
|
|
158
|
+
token: str,
|
|
159
|
+
accelerator: str = "T4",
|
|
160
|
+
high_memory: bool = False,
|
|
161
|
+
) -> dict:
|
|
162
|
+
"""Allocate a Colab GPU runtime. Returns dict with endpoint + proxy info."""
|
|
163
|
+
nbh = generate_notebook_hash()
|
|
164
|
+
params = _build_assign_params(nbh, accelerator, high_memory)
|
|
165
|
+
headers = _colab_headers(token)
|
|
166
|
+
|
|
167
|
+
hm_label = " (high-memory)" if high_memory else ""
|
|
168
|
+
print(f"[colab-gpu] Requesting {accelerator}{hm_label} runtime...", file=sys.stderr)
|
|
169
|
+
r = requests.get(f"{COLAB_API}/tun/m/assign", params=params, headers=headers, timeout=30)
|
|
170
|
+
r.raise_for_status()
|
|
171
|
+
data = _strip_xssi(r.text)
|
|
172
|
+
|
|
173
|
+
parsed = _parse_assignment(data)
|
|
174
|
+
if parsed["endpoint"] and parsed["proxy_url"] and parsed["proxy_token"]:
|
|
175
|
+
print(f"[colab-gpu] Reusing existing runtime: endpoint={parsed['endpoint']}", file=sys.stderr)
|
|
176
|
+
return {
|
|
177
|
+
**parsed,
|
|
178
|
+
"xsrf_token": None,
|
|
179
|
+
"nbh": nbh,
|
|
180
|
+
"reused": True,
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
xsrf_token = data.get("token") or data.get("xsrfToken")
|
|
184
|
+
if not xsrf_token:
|
|
185
|
+
raise RuntimeError("No XSRF token in Colab assign response")
|
|
186
|
+
|
|
187
|
+
post_headers = _colab_headers(token, {"X-Goog-Colab-Token": xsrf_token})
|
|
188
|
+
r = requests.post(
|
|
189
|
+
f"{COLAB_API}/tun/m/assign",
|
|
190
|
+
params=params,
|
|
191
|
+
headers=post_headers,
|
|
192
|
+
timeout=30,
|
|
193
|
+
)
|
|
194
|
+
r.raise_for_status()
|
|
195
|
+
assignment = _strip_xssi(r.text)
|
|
196
|
+
|
|
197
|
+
parsed = _parse_assignment(assignment)
|
|
198
|
+
if not parsed["endpoint"] or not parsed["proxy_url"] or not parsed["proxy_token"]:
|
|
199
|
+
raise RuntimeError("Incomplete assignment response from Colab")
|
|
200
|
+
|
|
201
|
+
print(f"[colab-gpu] Runtime allocated: endpoint={parsed['endpoint']}", file=sys.stderr)
|
|
202
|
+
return {
|
|
203
|
+
**parsed,
|
|
204
|
+
"xsrf_token": xsrf_token,
|
|
205
|
+
"nbh": nbh,
|
|
206
|
+
"reused": False,
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def unassign_runtime(token: str, endpoint: str) -> bool:
|
|
211
|
+
"""Release the runtime."""
|
|
212
|
+
headers = _colab_headers(token)
|
|
213
|
+
url = f"{COLAB_API}/tun/m/unassign/{endpoint}"
|
|
214
|
+
params = {"authuser": "0"}
|
|
215
|
+
try:
|
|
216
|
+
r = requests.get(url, headers=headers, params=params, timeout=20)
|
|
217
|
+
r.raise_for_status()
|
|
218
|
+
data = _strip_xssi(r.text)
|
|
219
|
+
xsrf = data.get("token", "")
|
|
220
|
+
post_headers = _colab_headers(token, {"X-Goog-Colab-Token": xsrf})
|
|
221
|
+
r = requests.post(url, headers=post_headers, params=params, timeout=20)
|
|
222
|
+
r.raise_for_status()
|
|
223
|
+
print(f"[colab-gpu] Runtime {endpoint} released.", file=sys.stderr)
|
|
224
|
+
return True
|
|
225
|
+
except Exception as e:
|
|
226
|
+
print(f"[colab-gpu] Warning: failed to unassign runtime: {e}", file=sys.stderr)
|
|
227
|
+
return False
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def propagate_credentials(token: str, endpoint: str, auth_type: str, dry_run: bool) -> dict:
|
|
231
|
+
"""Call Colab credentials propagation API for ephemeral auth challenges."""
|
|
232
|
+
url = f"{COLAB_API}/tun/m/credentials-propagation/{endpoint}"
|
|
233
|
+
params = {
|
|
234
|
+
"authuser": "0",
|
|
235
|
+
"authtype": auth_type,
|
|
236
|
+
"version": "2",
|
|
237
|
+
"dryrun": str(bool(dry_run)).lower(),
|
|
238
|
+
"propagate": "true",
|
|
239
|
+
"record": "false",
|
|
240
|
+
}
|
|
241
|
+
headers = _colab_headers(token)
|
|
242
|
+
r = requests.get(url, headers=headers, params=params, timeout=30)
|
|
243
|
+
r.raise_for_status()
|
|
244
|
+
data = _strip_xssi(r.text)
|
|
245
|
+
xsrf = data.get("token") or data.get("xsrfToken")
|
|
246
|
+
if not xsrf:
|
|
247
|
+
raise RuntimeError("No XSRF token from credentials propagation")
|
|
248
|
+
post_headers = _colab_headers(token, {"X-Goog-Colab-Token": xsrf})
|
|
249
|
+
r = requests.post(url, headers=post_headers, params=params, timeout=30)
|
|
250
|
+
r.raise_for_status()
|
|
251
|
+
return _strip_xssi(r.text)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def start_keepalive(token: str, endpoint: str) -> threading.Event:
|
|
255
|
+
"""Start a background thread that pings keep-alive every 60s."""
|
|
256
|
+
stop_event = threading.Event()
|
|
257
|
+
def loop():
|
|
258
|
+
headers = _colab_headers(token, {"X-Colab-Tunnel": "Google"})
|
|
259
|
+
url = f"{COLAB_API}/tun/m/{endpoint}/keep-alive/"
|
|
260
|
+
params = {"authuser": "0"}
|
|
261
|
+
while not stop_event.is_set():
|
|
262
|
+
try:
|
|
263
|
+
requests.get(url, headers=headers, params=params, timeout=10)
|
|
264
|
+
except Exception:
|
|
265
|
+
pass
|
|
266
|
+
stop_event.wait(60)
|
|
267
|
+
t = threading.Thread(target=loop, daemon=True)
|
|
268
|
+
t.start()
|
|
269
|
+
return stop_event
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def create_session(proxy_url: str, proxy_token: str, startup_timeout: int = 180) -> str:
|
|
273
|
+
"""Create a Jupyter session and return the kernel ID."""
|
|
274
|
+
headers = {
|
|
275
|
+
"X-Colab-Runtime-Proxy-Token": proxy_token,
|
|
276
|
+
"X-Colab-Client-Agent": "vscode",
|
|
277
|
+
"Content-Type": "application/json",
|
|
278
|
+
}
|
|
279
|
+
body = {
|
|
280
|
+
"kernel": {"name": "python3"},
|
|
281
|
+
"name": "colab-exec",
|
|
282
|
+
"path": "colab-exec",
|
|
283
|
+
"type": "notebook",
|
|
284
|
+
}
|
|
285
|
+
last_error = None
|
|
286
|
+
deadline = time.time() + startup_timeout
|
|
287
|
+
attempt = 0
|
|
288
|
+
while time.time() < deadline:
|
|
289
|
+
attempt += 1
|
|
290
|
+
try:
|
|
291
|
+
r = requests.post(f"{proxy_url}/api/sessions", headers=headers, json=body, timeout=30)
|
|
292
|
+
r.raise_for_status()
|
|
293
|
+
data = r.json()
|
|
294
|
+
kernel_id = data["kernel"]["id"]
|
|
295
|
+
print(f"[colab-gpu] Kernel ready: {kernel_id}", file=sys.stderr)
|
|
296
|
+
return kernel_id
|
|
297
|
+
except Exception as e:
|
|
298
|
+
last_error = e
|
|
299
|
+
remaining = int(deadline - time.time())
|
|
300
|
+
if remaining <= 0:
|
|
301
|
+
break
|
|
302
|
+
print(f"[colab-gpu] Waiting for runtime readiness (attempt {attempt}, {remaining}s left)...", file=sys.stderr)
|
|
303
|
+
time.sleep(3)
|
|
304
|
+
raise RuntimeError(f"Timed out creating kernel session: {last_error}")
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _make_colab_input_reply(client_session_id: str, colab_msg_id, err: str = None) -> dict:
|
|
308
|
+
value = {"type": "colab_reply", "colab_msg_id": colab_msg_id}
|
|
309
|
+
if err:
|
|
310
|
+
value["error"] = err
|
|
311
|
+
return {
|
|
312
|
+
"header": {
|
|
313
|
+
"msg_id": uuid.uuid4().hex,
|
|
314
|
+
"msg_type": "input_reply",
|
|
315
|
+
"session": client_session_id,
|
|
316
|
+
"date": time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime()),
|
|
317
|
+
"username": "username",
|
|
318
|
+
"version": "5.0",
|
|
319
|
+
},
|
|
320
|
+
"content": {"value": value},
|
|
321
|
+
"channel": "stdin",
|
|
322
|
+
"metadata": {},
|
|
323
|
+
"parent_header": {},
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def execute_code(
|
|
328
|
+
proxy_url: str,
|
|
329
|
+
proxy_token: str,
|
|
330
|
+
kernel_id: str,
|
|
331
|
+
code: str,
|
|
332
|
+
timeout: int = 300,
|
|
333
|
+
access_token: str = None,
|
|
334
|
+
endpoint: str = None,
|
|
335
|
+
) -> tuple[str, str, int]:
|
|
336
|
+
"""Execute code on a Colab kernel via WebSocket.
|
|
337
|
+
Returns (stdout, stderr, exit_code).
|
|
338
|
+
"""
|
|
339
|
+
stdout_buf = io.StringIO()
|
|
340
|
+
stderr_buf = io.StringIO()
|
|
341
|
+
|
|
342
|
+
execute_session_id = uuid.uuid4().hex
|
|
343
|
+
ws_url = proxy_url.replace("https://", "wss://").replace("http://", "ws://")
|
|
344
|
+
ws_url = f"{ws_url}/api/kernels/{kernel_id}/channels?session_id={execute_session_id}"
|
|
345
|
+
|
|
346
|
+
ws = websocket.create_connection(
|
|
347
|
+
ws_url,
|
|
348
|
+
header=[f"X-Colab-Runtime-Proxy-Token: {proxy_token}", "X-Colab-Client-Agent: vscode"],
|
|
349
|
+
timeout=timeout,
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
msg_id = uuid.uuid4().hex
|
|
353
|
+
execute_msg = {
|
|
354
|
+
"header": {
|
|
355
|
+
"msg_id": msg_id, "msg_type": "execute_request",
|
|
356
|
+
"username": "colab-exec", "session": execute_session_id, "version": "5.3",
|
|
357
|
+
},
|
|
358
|
+
"parent_header": {},
|
|
359
|
+
"metadata": {},
|
|
360
|
+
"content": {
|
|
361
|
+
"code": code, "silent": False, "store_history": True,
|
|
362
|
+
"user_expressions": {}, "allow_stdin": False, "stop_on_error": True,
|
|
363
|
+
},
|
|
364
|
+
"channel": "shell",
|
|
365
|
+
}
|
|
366
|
+
ws.send(json.dumps(execute_msg))
|
|
367
|
+
|
|
368
|
+
had_error = False
|
|
369
|
+
saw_idle = False
|
|
370
|
+
deadline = time.time() + timeout
|
|
371
|
+
|
|
372
|
+
while time.time() < deadline:
|
|
373
|
+
try:
|
|
374
|
+
raw = ws.recv()
|
|
375
|
+
except websocket.WebSocketTimeoutException:
|
|
376
|
+
continue
|
|
377
|
+
if not raw:
|
|
378
|
+
continue
|
|
379
|
+
|
|
380
|
+
msg = json.loads(raw)
|
|
381
|
+
msg_type = msg.get("msg_type") or msg.get("header", {}).get("msg_type", "")
|
|
382
|
+
content = msg.get("content", {})
|
|
383
|
+
|
|
384
|
+
if msg_type == "colab_request":
|
|
385
|
+
metadata = msg.get("metadata", {})
|
|
386
|
+
request_type = metadata.get("colab_request_type")
|
|
387
|
+
colab_msg_id = metadata.get("colab_msg_id")
|
|
388
|
+
auth_type = (content.get("request", {}).get("authType", "") if isinstance(content, dict) else "")
|
|
389
|
+
auth_type = str(auth_type).lower()
|
|
390
|
+
if request_type == "request_auth" and colab_msg_id is not None:
|
|
391
|
+
error_text = None
|
|
392
|
+
if not access_token or not endpoint:
|
|
393
|
+
error_text = "missing auth context for credentials propagation"
|
|
394
|
+
elif auth_type not in EPHEMERAL_AUTH_TYPES:
|
|
395
|
+
error_text = f"unsupported auth type: {auth_type}"
|
|
396
|
+
else:
|
|
397
|
+
try:
|
|
398
|
+
dry = propagate_credentials(access_token, endpoint, auth_type, dry_run=True)
|
|
399
|
+
if dry.get("success"):
|
|
400
|
+
propagate_credentials(access_token, endpoint, auth_type, dry_run=False)
|
|
401
|
+
elif dry.get("unauthorizedRedirectUri"):
|
|
402
|
+
error_text = f"{auth_type} requires interactive browser consent: {dry['unauthorizedRedirectUri']}"
|
|
403
|
+
else:
|
|
404
|
+
error_text = f"{auth_type} dry-run failed: {dry}"
|
|
405
|
+
except Exception as e:
|
|
406
|
+
error_text = f"{auth_type} propagation failed: {e}"
|
|
407
|
+
reply = _make_colab_input_reply(execute_session_id, colab_msg_id, error_text)
|
|
408
|
+
ws.send(json.dumps(reply))
|
|
409
|
+
if error_text:
|
|
410
|
+
stderr_buf.write(f"[colab-gpu] Warning: {error_text}\n")
|
|
411
|
+
continue
|
|
412
|
+
|
|
413
|
+
parent_msg_id = msg.get("parent_header", {}).get("msg_id")
|
|
414
|
+
if parent_msg_id != msg_id:
|
|
415
|
+
continue
|
|
416
|
+
|
|
417
|
+
if msg_type == "stream":
|
|
418
|
+
text = content.get("text", "")
|
|
419
|
+
if content.get("name", "stdout") == "stdout":
|
|
420
|
+
stdout_buf.write(text)
|
|
421
|
+
else:
|
|
422
|
+
stderr_buf.write(text)
|
|
423
|
+
elif msg_type == "execute_result":
|
|
424
|
+
text = content.get("data", {}).get("text/plain", "")
|
|
425
|
+
if text:
|
|
426
|
+
stdout_buf.write(text + "\n")
|
|
427
|
+
elif msg_type == "display_data":
|
|
428
|
+
text = content.get("data", {}).get("text/plain", "")
|
|
429
|
+
if text:
|
|
430
|
+
stdout_buf.write(text + "\n")
|
|
431
|
+
elif msg_type == "error":
|
|
432
|
+
had_error = True
|
|
433
|
+
ename = content.get("ename", "Error")
|
|
434
|
+
evalue = content.get("evalue", "")
|
|
435
|
+
stderr_buf.write(f"\n{ename}: {evalue}\n")
|
|
436
|
+
for line in content.get("traceback", []):
|
|
437
|
+
stderr_buf.write(re.sub(r"\x1b\[[0-9;]*m", "", line) + "\n")
|
|
438
|
+
elif msg_type == "status":
|
|
439
|
+
if content.get("execution_state") == "idle":
|
|
440
|
+
saw_idle = True
|
|
441
|
+
break
|
|
442
|
+
|
|
443
|
+
ws.close()
|
|
444
|
+
if not saw_idle:
|
|
445
|
+
stderr_buf.write("[colab-gpu] ERROR: Timed out waiting for kernel execution to finish.\n")
|
|
446
|
+
had_error = True
|
|
447
|
+
|
|
448
|
+
return stdout_buf.getvalue(), stderr_buf.getvalue(), 1 if had_error else 0
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
"""MCP server for executing Python code on Google Colab GPU/TPU runtimes.
|
|
2
|
+
|
|
3
|
+
Exposes three tools via the FastMCP API:
|
|
4
|
+
- colab_execute: Run inline Python code on a Colab GPU/TPU
|
|
5
|
+
- colab_execute_file: Run a local .py file on a Colab GPU/TPU
|
|
6
|
+
- colab_execute_notebook: Run code and collect generated artifacts
|
|
7
|
+
|
|
8
|
+
Original: mcp-server-colab-exec v0.1.0 by Paritosh Dwivedi (MIT License)
|
|
9
|
+
Extended: mcp-colab-gpu by Masaya Hirano --- all Colab Pro GPUs, high-memory, security fixes.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import base64
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
import pathlib
|
|
16
|
+
import re
|
|
17
|
+
import zipfile
|
|
18
|
+
|
|
19
|
+
from mcp.server.fastmcp import FastMCP
|
|
20
|
+
|
|
21
|
+
from .colab_runtime import (
|
|
22
|
+
allocate_runtime,
|
|
23
|
+
create_session,
|
|
24
|
+
execute_code,
|
|
25
|
+
get_credentials,
|
|
26
|
+
start_keepalive,
|
|
27
|
+
unassign_runtime,
|
|
28
|
+
validate_params,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
mcp = FastMCP("colab-gpu")
|
|
32
|
+
|
|
33
|
+
CELL_START = "===CELL_START_{n}==="
|
|
34
|
+
CELL_END = "===CELL_END_{n}==="
|
|
35
|
+
ARTIFACT_B64_START = "ARTIFACT_BASE64_START"
|
|
36
|
+
ARTIFACT_B64_END = "ARTIFACT_BASE64_END"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _wrap_cells(code: str) -> tuple[str, int]:
|
|
40
|
+
"""Wrap code in cell-boundary markers so output can be parsed per-cell."""
|
|
41
|
+
raw_cells = re.split(r"\n{2,}", code.strip())
|
|
42
|
+
cells = [c.strip() for c in raw_cells if c.strip()]
|
|
43
|
+
if not cells:
|
|
44
|
+
cells = [code]
|
|
45
|
+
wrapped_parts = []
|
|
46
|
+
for i, cell in enumerate(cells):
|
|
47
|
+
marker_start = CELL_START.format(n=i)
|
|
48
|
+
marker_end = CELL_END.format(n=i)
|
|
49
|
+
wrapped_parts.append(
|
|
50
|
+
f'print("{marker_start}", flush=True)\n'
|
|
51
|
+
f"{cell}\n"
|
|
52
|
+
f'print("{marker_end}", flush=True)'
|
|
53
|
+
)
|
|
54
|
+
return "\n\n".join(wrapped_parts), len(cells)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _parse_cell_output(stdout: str, num_cells: int) -> list[dict]:
|
|
58
|
+
cells = []
|
|
59
|
+
for i in range(num_cells):
|
|
60
|
+
start_marker = CELL_START.format(n=i)
|
|
61
|
+
end_marker = CELL_END.format(n=i)
|
|
62
|
+
pattern = re.escape(start_marker) + r"\n?(.*?)\n?" + re.escape(end_marker)
|
|
63
|
+
match = re.search(pattern, stdout, re.DOTALL)
|
|
64
|
+
cell_stdout = match.group(1).strip() if match else ""
|
|
65
|
+
cells.append({"cell_num": i, "stdout": cell_stdout, "status": "ok" if match else "no_output"})
|
|
66
|
+
return cells
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _extract_artifact_b64(stdout: str) -> str | None:
|
|
70
|
+
pattern = re.escape(ARTIFACT_B64_START) + r"\n(.*?)\n" + re.escape(ARTIFACT_B64_END)
|
|
71
|
+
match = re.search(pattern, stdout, re.DOTALL)
|
|
72
|
+
return match.group(1).strip() if match else None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _run_on_colab(code: str, accelerator: str, high_memory: bool, timeout: int) -> tuple[str, str, int]:
|
|
76
|
+
validate_params(accelerator, timeout)
|
|
77
|
+
creds = get_credentials()
|
|
78
|
+
access_token = creds.token
|
|
79
|
+
assignment = allocate_runtime(access_token, accelerator, high_memory)
|
|
80
|
+
stop_event = start_keepalive(access_token, assignment["endpoint"])
|
|
81
|
+
try:
|
|
82
|
+
kernel_id = create_session(assignment["proxy_url"], assignment["proxy_token"])
|
|
83
|
+
stdout, stderr, exit_code = execute_code(
|
|
84
|
+
assignment["proxy_url"], assignment["proxy_token"], kernel_id, code,
|
|
85
|
+
timeout=timeout, access_token=access_token, endpoint=assignment["endpoint"],
|
|
86
|
+
)
|
|
87
|
+
finally:
|
|
88
|
+
stop_event.set()
|
|
89
|
+
unassign_runtime(access_token, assignment["endpoint"])
|
|
90
|
+
return stdout, stderr, exit_code
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _validate_file_path(raw: str) -> pathlib.Path:
|
|
94
|
+
"""Validate file path: must exist, must be .py."""
|
|
95
|
+
resolved = pathlib.Path(os.path.expanduser(raw)).resolve()
|
|
96
|
+
if resolved.suffix != ".py":
|
|
97
|
+
raise ValueError(f"Only .py files are allowed, got: '{resolved.suffix}'")
|
|
98
|
+
if not resolved.is_file():
|
|
99
|
+
raise FileNotFoundError(f"File not found: {resolved}")
|
|
100
|
+
return resolved
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _safe_extract_zip(zip_path: str, output_dir: str) -> list[str]:
|
|
104
|
+
"""Extract zip with zip-slip protection. Returns list of extracted filenames."""
|
|
105
|
+
resolved_output = pathlib.Path(output_dir).resolve()
|
|
106
|
+
with zipfile.ZipFile(zip_path, "r") as zf:
|
|
107
|
+
for member in zf.infolist():
|
|
108
|
+
member_path = (resolved_output / member.filename).resolve()
|
|
109
|
+
if not str(member_path).startswith(str(resolved_output)):
|
|
110
|
+
raise ValueError(f"Zip slip detected: {member.filename}")
|
|
111
|
+
artifact_files = zf.namelist()
|
|
112
|
+
zf.extractall(output_dir)
|
|
113
|
+
return artifact_files
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@mcp.tool(annotations={"readOnlyHint": False, "destructiveHint": False})
|
|
117
|
+
def colab_execute(
|
|
118
|
+
code: str,
|
|
119
|
+
accelerator: str = "T4",
|
|
120
|
+
high_memory: bool = False,
|
|
121
|
+
timeout: int = 300,
|
|
122
|
+
) -> str:
|
|
123
|
+
"""Execute Python code on a Google Colab GPU/TPU runtime.
|
|
124
|
+
|
|
125
|
+
Allocates a GPU or TPU, runs the code, and returns structured JSON
|
|
126
|
+
with per-cell output, errors, and stderr.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
code: Python code to execute on the Colab runtime.
|
|
130
|
+
accelerator: Hardware accelerator type. Default: "T4".
|
|
131
|
+
GPU types:
|
|
132
|
+
"T4" - NVIDIA Tesla T4 (16 GB, free-tier)
|
|
133
|
+
"L4" - NVIDIA L4 (24 GB, Colab Pro)
|
|
134
|
+
"A100" - NVIDIA A100 (40 GB, Colab Pro/Pro+)
|
|
135
|
+
"H100" - NVIDIA H100 (80 GB, Colab Pro+)
|
|
136
|
+
"G4" - NVIDIA G4 (Colab Pro+)
|
|
137
|
+
TPU types:
|
|
138
|
+
"V5E1" - TPU v5e-1 (Colab Pro+)
|
|
139
|
+
"V6E1" - TPU v6e-1 (Colab Pro+)
|
|
140
|
+
high_memory: Enable high-memory runtime (more RAM). Default: False.
|
|
141
|
+
timeout: Max execution time in seconds. Default: 300.
|
|
142
|
+
"""
|
|
143
|
+
wrapped, num_cells = _wrap_cells(code)
|
|
144
|
+
stdout, stderr, rc = _run_on_colab(wrapped, accelerator, high_memory, timeout)
|
|
145
|
+
cells = _parse_cell_output(stdout, num_cells)
|
|
146
|
+
errors = [c for c in cells if c["status"] != "ok"] if rc != 0 else []
|
|
147
|
+
return json.dumps({"cells": cells, "errors": errors, "stderr": stderr, "exit_code": rc}, indent=2)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@mcp.tool(annotations={"readOnlyHint": False, "destructiveHint": False})
|
|
151
|
+
def colab_execute_file(
|
|
152
|
+
file_path: str,
|
|
153
|
+
accelerator: str = "T4",
|
|
154
|
+
high_memory: bool = False,
|
|
155
|
+
timeout: int = 300,
|
|
156
|
+
) -> str:
|
|
157
|
+
"""Execute a local Python file on a Google Colab GPU/TPU runtime.
|
|
158
|
+
|
|
159
|
+
Reads the file contents and sends them for execution on a Colab runtime.
|
|
160
|
+
Only .py files are allowed for security.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
file_path: Path to a local .py file to execute on Colab.
|
|
164
|
+
accelerator: Hardware accelerator type. Default: "T4".
|
|
165
|
+
GPU types:
|
|
166
|
+
"T4" - NVIDIA Tesla T4 (16 GB, free-tier)
|
|
167
|
+
"L4" - NVIDIA L4 (24 GB, Colab Pro)
|
|
168
|
+
"A100" - NVIDIA A100 (40 GB, Colab Pro/Pro+)
|
|
169
|
+
"H100" - NVIDIA H100 (80 GB, Colab Pro+)
|
|
170
|
+
"G4" - NVIDIA G4 (Colab Pro+)
|
|
171
|
+
TPU types:
|
|
172
|
+
"V5E1" - TPU v5e-1 (Colab Pro+)
|
|
173
|
+
"V6E1" - TPU v6e-1 (Colab Pro+)
|
|
174
|
+
high_memory: Enable high-memory runtime (more RAM). Default: False.
|
|
175
|
+
timeout: Max execution time in seconds. Default: 300.
|
|
176
|
+
"""
|
|
177
|
+
try:
|
|
178
|
+
resolved = _validate_file_path(file_path)
|
|
179
|
+
except (ValueError, FileNotFoundError) as e:
|
|
180
|
+
return json.dumps({"error": str(e)})
|
|
181
|
+
|
|
182
|
+
code = resolved.read_text()
|
|
183
|
+
wrapped, num_cells = _wrap_cells(code)
|
|
184
|
+
stdout, stderr, rc = _run_on_colab(wrapped, accelerator, high_memory, timeout)
|
|
185
|
+
cells = _parse_cell_output(stdout, num_cells)
|
|
186
|
+
errors = [c for c in cells if c["status"] != "ok"] if rc != 0 else []
|
|
187
|
+
return json.dumps({"cells": cells, "errors": errors, "stderr": stderr, "exit_code": rc}, indent=2)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@mcp.tool(annotations={"readOnlyHint": False, "destructiveHint": False})
|
|
191
|
+
def colab_execute_notebook(
|
|
192
|
+
code: str,
|
|
193
|
+
output_dir: str,
|
|
194
|
+
accelerator: str = "T4",
|
|
195
|
+
high_memory: bool = False,
|
|
196
|
+
timeout: int = 300,
|
|
197
|
+
) -> str:
|
|
198
|
+
"""Execute Python code on Colab GPU/TPU and collect generated artifacts.
|
|
199
|
+
|
|
200
|
+
Runs the code, then scans the runtime for output files (images, CSVs,
|
|
201
|
+
models, etc.), zips them, and downloads to a local directory.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
code: Python code to execute on the Colab runtime.
|
|
205
|
+
output_dir: Local directory to save the artifacts zip and extracted files.
|
|
206
|
+
accelerator: Hardware accelerator type. Default: "T4".
|
|
207
|
+
GPU types:
|
|
208
|
+
"T4" - NVIDIA Tesla T4 (16 GB, free-tier)
|
|
209
|
+
"L4" - NVIDIA L4 (24 GB, Colab Pro)
|
|
210
|
+
"A100" - NVIDIA A100 (40 GB, Colab Pro/Pro+)
|
|
211
|
+
"H100" - NVIDIA H100 (80 GB, Colab Pro+)
|
|
212
|
+
"G4" - NVIDIA G4 (Colab Pro+)
|
|
213
|
+
TPU types:
|
|
214
|
+
"V5E1" - TPU v5e-1 (Colab Pro+)
|
|
215
|
+
"V6E1" - TPU v6e-1 (Colab Pro+)
|
|
216
|
+
high_memory: Enable high-memory runtime (more RAM). Default: False.
|
|
217
|
+
timeout: Max execution time in seconds. Default: 300.
|
|
218
|
+
"""
|
|
219
|
+
output_dir = os.path.expanduser(output_dir)
|
|
220
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
221
|
+
|
|
222
|
+
artifact_code = '''
|
|
223
|
+
|
|
224
|
+
# --- colab-exec artifact collection ---
|
|
225
|
+
import os, zipfile, base64, glob
|
|
226
|
+
|
|
227
|
+
_artifact_dir = "/tmp/colab_artifacts"
|
|
228
|
+
os.makedirs(_artifact_dir, exist_ok=True)
|
|
229
|
+
|
|
230
|
+
_scan_dirs = ["/tmp", os.getcwd(), "/content"]
|
|
231
|
+
_skip_prefixes = ["/tmp/colab_artifacts", "/tmp/."]
|
|
232
|
+
_collected = []
|
|
233
|
+
for _sd in _scan_dirs:
|
|
234
|
+
if not os.path.isdir(_sd):
|
|
235
|
+
continue
|
|
236
|
+
for _root, _dirs, _files in os.walk(_sd):
|
|
237
|
+
_dirs[:] = [d for d in _dirs if not d.startswith('.')]
|
|
238
|
+
if any(_root.startswith(p) for p in _skip_prefixes):
|
|
239
|
+
continue
|
|
240
|
+
for _f in _files:
|
|
241
|
+
_fp = os.path.join(_root, _f)
|
|
242
|
+
if _f.endswith(('.png', '.jpg', '.jpeg', '.gif', '.svg', '.csv', '.json',
|
|
243
|
+
'.txt', '.pt', '.pth', '.h5', '.pkl', '.npy', '.npz',
|
|
244
|
+
'.onnx', '.mp4', '.wav', '.mp3', '.pdf')):
|
|
245
|
+
try:
|
|
246
|
+
if os.path.getsize(_fp) < 50 * 1024 * 1024:
|
|
247
|
+
_collected.append(_fp)
|
|
248
|
+
except OSError:
|
|
249
|
+
pass
|
|
250
|
+
|
|
251
|
+
if _collected:
|
|
252
|
+
_zip_path = "/tmp/colab_artifacts.zip"
|
|
253
|
+
with zipfile.ZipFile(_zip_path, 'w', zipfile.ZIP_DEFLATED) as _zf:
|
|
254
|
+
for _fp in _collected:
|
|
255
|
+
_zf.write(_fp, os.path.basename(_fp))
|
|
256
|
+
with open(_zip_path, 'rb') as _zfh:
|
|
257
|
+
_b64 = base64.b64encode(_zfh.read()).decode('ascii')
|
|
258
|
+
print("ARTIFACT_BASE64_START")
|
|
259
|
+
print(_b64)
|
|
260
|
+
print("ARTIFACT_BASE64_END")
|
|
261
|
+
print(f"[colab-gpu] Collected {len(_collected)} artifact(s)", flush=True)
|
|
262
|
+
else:
|
|
263
|
+
print("[colab-gpu] No artifacts found to collect", flush=True)
|
|
264
|
+
'''
|
|
265
|
+
full_code = code + "\n\n" + artifact_code
|
|
266
|
+
wrapped, num_cells = _wrap_cells(full_code)
|
|
267
|
+
stdout, stderr, rc = _run_on_colab(wrapped, accelerator, high_memory, timeout)
|
|
268
|
+
cells = _parse_cell_output(stdout, num_cells)
|
|
269
|
+
errors = [c for c in cells if c["status"] != "ok"] if rc != 0 else []
|
|
270
|
+
|
|
271
|
+
artifact_files = []
|
|
272
|
+
artifacts_zip_path = None
|
|
273
|
+
b64_data = _extract_artifact_b64(stdout)
|
|
274
|
+
if b64_data:
|
|
275
|
+
try:
|
|
276
|
+
zip_bytes = base64.b64decode(b64_data)
|
|
277
|
+
artifacts_zip_path = os.path.join(output_dir, "colab_artifacts.zip")
|
|
278
|
+
with open(artifacts_zip_path, "wb") as f:
|
|
279
|
+
f.write(zip_bytes)
|
|
280
|
+
artifact_files = _safe_extract_zip(artifacts_zip_path, output_dir)
|
|
281
|
+
except Exception as e:
|
|
282
|
+
errors.append({"artifact_error": str(e)})
|
|
283
|
+
|
|
284
|
+
return json.dumps({
|
|
285
|
+
"cells": cells, "errors": errors, "artifacts_zip": artifacts_zip_path,
|
|
286
|
+
"artifact_files": artifact_files, "stderr": stderr, "exit_code": rc,
|
|
287
|
+
}, indent=2)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def main():
|
|
291
|
+
mcp.run(transport="stdio")
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
if __name__ == "__main__":
|
|
295
|
+
main()
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "mcp-colab-gpu"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "MCP server for executing Python on Google Colab GPU/TPU runtimes — supports all Colab Pro accelerators (T4/L4/A100/H100/G4) with high-memory option and security hardening. Fork of mcp-server-colab-exec by Paritosh Dwivedi."
|
|
5
|
+
requires-python = ">=3.10"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
authors = [
|
|
8
|
+
{name = "Masaya Hirano"},
|
|
9
|
+
]
|
|
10
|
+
maintainers = [
|
|
11
|
+
{name = "Masaya Hirano"},
|
|
12
|
+
]
|
|
13
|
+
keywords = ["colab", "cuda", "gpu", "tpu", "machine-learning", "mcp", "colab-pro"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"google-auth>=2.0",
|
|
21
|
+
"google-auth-oauthlib>=1.0",
|
|
22
|
+
"mcp[cli]>=1.6.0",
|
|
23
|
+
"requests>=2.28",
|
|
24
|
+
"websocket-client>=1.5",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.urls]
|
|
28
|
+
Homepage = "https://github.com/mio-github/mcp-colab-gpu"
|
|
29
|
+
Repository = "https://github.com/mio-github/mcp-colab-gpu"
|
|
30
|
+
"Original Project" = "https://github.com/pdwi2020/mcp-server-colab-exec"
|
|
31
|
+
|
|
32
|
+
[project.scripts]
|
|
33
|
+
mcp-colab-gpu = "mcp_colab_gpu.server:main"
|
|
34
|
+
|
|
35
|
+
[build-system]
|
|
36
|
+
requires = ["hatchling"]
|
|
37
|
+
build-backend = "hatchling.build"
|