openmodal 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openmodal-0.3.0/.github/workflows/docs.yml +18 -0
- openmodal-0.3.0/.gitignore +8 -0
- openmodal-0.3.0/PKG-INFO +12 -0
- openmodal-0.3.0/README.md +39 -0
- openmodal-0.3.0/docs/examples/harbor.md +78 -0
- openmodal-0.3.0/docs/examples/hello_world.md +65 -0
- openmodal-0.3.0/docs/examples/index.md +28 -0
- openmodal-0.3.0/docs/examples/sandbox.md +87 -0
- openmodal-0.3.0/docs/examples/sft_finetune.md +88 -0
- openmodal-0.3.0/docs/examples/vllm_serving.md +115 -0
- openmodal-0.3.0/docs/examples/web_scraper.md +117 -0
- openmodal-0.3.0/docs/index.md +39 -0
- openmodal-0.3.0/docs/providers.md +101 -0
- openmodal-0.3.0/docs/roadmap.md +56 -0
- openmodal-0.3.0/docs/setup.md +58 -0
- openmodal-0.3.0/examples/hello_world.py +33 -0
- openmodal-0.3.0/examples/sandbox.py +82 -0
- openmodal-0.3.0/examples/sft_finetune.py +238 -0
- openmodal-0.3.0/examples/vllm_serving.py +58 -0
- openmodal-0.3.0/examples/webscraper.py +24 -0
- openmodal-0.3.0/examples/webscraper_requests.py +26 -0
- openmodal-0.3.0/mkdocs.yml +46 -0
- openmodal-0.3.0/pyproject.toml +41 -0
- openmodal-0.3.0/src/openmodal/__init__.py +11 -0
- openmodal-0.3.0/src/openmodal/_async_utils.py +70 -0
- openmodal-0.3.0/src/openmodal/_decorators.py +22 -0
- openmodal-0.3.0/src/openmodal/app.py +110 -0
- openmodal-0.3.0/src/openmodal/cli/__init__.py +54 -0
- openmodal-0.3.0/src/openmodal/cli/console.py +55 -0
- openmodal-0.3.0/src/openmodal/cli/deploy.py +44 -0
- openmodal-0.3.0/src/openmodal/cli/ps.py +22 -0
- openmodal-0.3.0/src/openmodal/cli/run.py +136 -0
- openmodal-0.3.0/src/openmodal/cli/stop.py +25 -0
- openmodal-0.3.0/src/openmodal/function.py +31 -0
- openmodal-0.3.0/src/openmodal/image.py +144 -0
- openmodal-0.3.0/src/openmodal/integrations/__init__.py +0 -0
- openmodal-0.3.0/src/openmodal/integrations/harbor_env.py +403 -0
- openmodal-0.3.0/src/openmodal/process.py +31 -0
- openmodal-0.3.0/src/openmodal/providers/__init__.py +28 -0
- openmodal-0.3.0/src/openmodal/providers/base.py +69 -0
- openmodal-0.3.0/src/openmodal/providers/gcp/__init__.py +0 -0
- openmodal-0.3.0/src/openmodal/providers/gcp/build.py +20 -0
- openmodal-0.3.0/src/openmodal/providers/gcp/compute.py +441 -0
- openmodal-0.3.0/src/openmodal/providers/gcp/config.py +92 -0
- openmodal-0.3.0/src/openmodal/providers/gcp/gke.py +597 -0
- openmodal-0.3.0/src/openmodal/providers/gcp/gke_setup.py +103 -0
- openmodal-0.3.0/src/openmodal/providers/gcp/network.py +26 -0
- openmodal-0.3.0/src/openmodal/providers/gcp/registry.py +19 -0
- openmodal-0.3.0/src/openmodal/providers/gcp/secrets.py +26 -0
- openmodal-0.3.0/src/openmodal/providers/gcp/storage.py +21 -0
- openmodal-0.3.0/src/openmodal/providers/local/__init__.py +232 -0
- openmodal-0.3.0/src/openmodal/remote.py +153 -0
- openmodal-0.3.0/src/openmodal/router/__init__.py +0 -0
- openmodal-0.3.0/src/openmodal/runtime/__init__.py +0 -0
- openmodal-0.3.0/src/openmodal/runtime/agent.py +102 -0
- openmodal-0.3.0/src/openmodal/runtime/startup.py +94 -0
- openmodal-0.3.0/src/openmodal/runtime/web_server.py +26 -0
- openmodal-0.3.0/src/openmodal/sandbox.py +177 -0
- openmodal-0.3.0/src/openmodal/secret.py +24 -0
- openmodal-0.3.0/src/openmodal/volume.py +49 -0
- openmodal-0.3.0/tests/__init__.py +0 -0
- openmodal-0.3.0/uv.lock +2496 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
name: Deploy docs
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
branches: [main]
|
|
5
|
+
|
|
6
|
+
permissions:
|
|
7
|
+
contents: write
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
deploy:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
- uses: actions/setup-python@v5
|
|
15
|
+
with:
|
|
16
|
+
python-version: '3.12'
|
|
17
|
+
- run: pip install mkdocs-material
|
|
18
|
+
- run: mkdocs gh-deploy --force
|
openmodal-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: openmodal
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Run Modal on your own cloud. Open-source, cloud-agnostic serverless GPU compute.
|
|
5
|
+
Requires-Python: ==3.12.*
|
|
6
|
+
Requires-Dist: click>=8.3
|
|
7
|
+
Requires-Dist: google-auth>=2.49
|
|
8
|
+
Requires-Dist: kubernetes>=35.0
|
|
9
|
+
Requires-Dist: requests>=2.33
|
|
10
|
+
Provides-Extra: harbor
|
|
11
|
+
Requires-Dist: harbor; extra == 'harbor'
|
|
12
|
+
Requires-Dist: tenacity>=9.0; extra == 'harbor'
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# OpenModal
|
|
2
|
+
|
|
3
|
+
A cloud-agnostic runtime that implements [Modal](https://modal.com)'s Python interface.
|
|
4
|
+
|
|
5
|
+
I built this because I wanted to run Modal on my own GCP account. Modal's API is clean and I didn't want to learn a different one. So OpenModal lets you write the same code and run it on your own infrastructure.
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
import openmodal
|
|
9
|
+
|
|
10
|
+
app = openmodal.App("my-experiment")
|
|
11
|
+
|
|
12
|
+
@app.function(gpu="H100")
|
|
13
|
+
def train(config):
|
|
14
|
+
...
|
|
15
|
+
|
|
16
|
+
results = train.map(configs)
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## What works
|
|
20
|
+
|
|
21
|
+
- `f.local()`, `f.remote()`, `f.map()`
|
|
22
|
+
- GPU serving with auto scale-to-zero
|
|
23
|
+
- Custom images, secrets, retries, volumes
|
|
24
|
+
- GKE with spot GPUs (H100, A100, L4)
|
|
25
|
+
- CLI: `openmodal run`, `deploy`, `stop`, `ps`
|
|
26
|
+
|
|
27
|
+
## Get started
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install openmodal
|
|
31
|
+
gcloud auth login
|
|
32
|
+
openmodal run examples/hello_world.py
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
[Setup guide](docs/setup.md) · [Examples](docs/examples/) · [Modal docs](https://modal.com/docs/guide) (same API, just swap the import)
|
|
36
|
+
|
|
37
|
+
## License
|
|
38
|
+
|
|
39
|
+
Apache-2.0
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Running SWE-bench with Harbor
|
|
2
|
+
|
|
3
|
+
Run SWE-bench evaluations using [Harbor](https://harborframework.com) with OpenModal as the compute backend.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install "openmodal[harbor]"
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Run
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
harbor run \
|
|
15
|
+
--agent mini-swe-agent \
|
|
16
|
+
--model openai/gpt-5.4 \
|
|
17
|
+
--environment-import-path openmodal.integrations.harbor_env:ModalEnvironment \
|
|
18
|
+
--dataset swe-bench/swe-bench-verified \
|
|
19
|
+
--n-tasks 1
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
This creates a sandbox on GKE, runs the agent against a real SWE-bench task, verifies the patch, and reports results.
|
|
23
|
+
|
|
24
|
+
## What happens
|
|
25
|
+
|
|
26
|
+
1. Harbor downloads a SWE-bench task (e.g., a Django bug)
|
|
27
|
+
2. OpenModal creates a Kubernetes pod with the task's Docker image
|
|
28
|
+
3. The agent runs inside the pod — reads the bug, edits code, runs tests
|
|
29
|
+
4. Harbor uploads test files, runs verification, reports pass/fail
|
|
30
|
+
5. Pod is cleaned up
|
|
31
|
+
|
|
32
|
+
## Options
|
|
33
|
+
|
|
34
|
+
**Different agents:**
|
|
35
|
+
```bash
|
|
36
|
+
# Claude Code
|
|
37
|
+
harbor run --agent claude-code --model anthropic/claude-sonnet-4-5-20250929 \
|
|
38
|
+
--environment-import-path openmodal.integrations.harbor_env:ModalEnvironment \
|
|
39
|
+
--dataset swe-bench/swe-bench-verified --n-tasks 5
|
|
40
|
+
|
|
41
|
+
# OpenHands
|
|
42
|
+
harbor run --agent openhands --model openai/gpt-5.4 \
|
|
43
|
+
--environment-import-path openmodal.integrations.harbor_env:ModalEnvironment \
|
|
44
|
+
--dataset swe-bench/swe-bench-verified --n-tasks 5
|
|
45
|
+
|
|
46
|
+
# SWE-agent
|
|
47
|
+
harbor run --agent swe-agent --model openai/gpt-5.4 \
|
|
48
|
+
--environment-import-path openmodal.integrations.harbor_env:ModalEnvironment \
|
|
49
|
+
--dataset swe-bench/swe-bench-verified --n-tasks 5
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
**Multiple attempts:**
|
|
53
|
+
```bash
|
|
54
|
+
harbor run --agent mini-swe-agent --model openai/gpt-5.4 \
|
|
55
|
+
--environment-import-path openmodal.integrations.harbor_env:ModalEnvironment \
|
|
56
|
+
--dataset swe-bench/swe-bench-verified --n-tasks 10 --n-attempts 3
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
**View results:**
|
|
60
|
+
```bash
|
|
61
|
+
harbor view jobs
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## How it compares to Modal
|
|
65
|
+
|
|
66
|
+
The only difference from running Harbor with Modal is the `--environment-import-path` flag. With Modal:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
harbor run --agent mini-swe-agent --model openai/gpt-5.4 --env modal --dataset swe-bench/swe-bench-verified
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
With OpenModal:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
harbor run --agent mini-swe-agent --model openai/gpt-5.4 --environment-import-path openmodal.integrations.harbor_env:ModalEnvironment --dataset swe-bench/swe-bench-verified
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Same agents, same datasets, same results — just runs on your own GCP infrastructure.
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Hello, world!
|
|
2
|
+
|
|
3
|
+
This example demonstrates the core features of OpenModal:
|
|
4
|
+
|
|
5
|
+
- Run functions locally with `f.local()`
|
|
6
|
+
- Run functions remotely on GCP with `f.remote()`
|
|
7
|
+
- Run functions in parallel with `f.map()`
|
|
8
|
+
|
|
9
|
+
## The code
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
import sys
|
|
13
|
+
import openmodal
|
|
14
|
+
|
|
15
|
+
app = openmodal.App("example-hello-world")
|
|
16
|
+
|
|
17
|
+
@app.function()
|
|
18
|
+
def f(i):
|
|
19
|
+
if i % 2 == 0:
|
|
20
|
+
print("hello", i)
|
|
21
|
+
else:
|
|
22
|
+
print("world", i, file=sys.stderr)
|
|
23
|
+
return i * i
|
|
24
|
+
|
|
25
|
+
@app.local_entrypoint()
|
|
26
|
+
def main():
|
|
27
|
+
# Run locally
|
|
28
|
+
print(f.local(1000))
|
|
29
|
+
|
|
30
|
+
# Run remotely on GCP
|
|
31
|
+
print(f.remote(1000))
|
|
32
|
+
|
|
33
|
+
# Run in parallel on GCP
|
|
34
|
+
total = 0
|
|
35
|
+
for ret in f.map(range(200)):
|
|
36
|
+
total += ret
|
|
37
|
+
print(total)
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Run it
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
openmodal run examples/hello_world.py
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
```
|
|
47
|
+
✓ Initialized.
|
|
48
|
+
✓ Created objects.
|
|
49
|
+
✓ Container created. (2 vCPU, 2 GB RAM • 34.135.113.28 • 14s)
|
|
50
|
+
✓ Container ready. (60s total)
|
|
51
|
+
hello 1000
|
|
52
|
+
1000000
|
|
53
|
+
1000000
|
|
54
|
+
2646700
|
|
55
|
+
✓ Containers cleaned up.
|
|
56
|
+
✓ App completed.
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## What happened?
|
|
60
|
+
|
|
61
|
+
1. `f.local(1000)` ran on your machine — printed `hello 1000`, returned `1000000`
|
|
62
|
+
2. `f.remote(1000)` created a GCE container, sent the function call to it, and returned the result
|
|
63
|
+
3. `f.map(range(200))` sent 200 calls to the remote container in parallel and streamed results back
|
|
64
|
+
|
|
65
|
+
The container was automatically cleaned up when the script finished.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Examples
|
|
2
|
+
|
|
3
|
+
OpenModal is API-compatible with Modal. If you've seen a Modal example, you can run it
|
|
4
|
+
on OpenModal by replacing `import modal` with `import openmodal`.
|
|
5
|
+
|
|
6
|
+
For hundreds of additional examples, see the [Modal examples gallery](https://modal.com/docs/examples) —
|
|
7
|
+
the same code works with OpenModal on your own GCP infrastructure.
|
|
8
|
+
|
|
9
|
+
## Getting started
|
|
10
|
+
|
|
11
|
+
- [Hello, world!](hello_world.md) — `f.local()`, `f.remote()`, `f.map()`
|
|
12
|
+
- [Web scraper](web_scraper.md) — custom images, async, parallel execution, CLI args
|
|
13
|
+
|
|
14
|
+
## GPU serving
|
|
15
|
+
|
|
16
|
+
- [vLLM serving](vllm_serving.md) — deploy a quantized LLM on H100 with auto scale-to-zero
|
|
17
|
+
|
|
18
|
+
## Sandboxes
|
|
19
|
+
|
|
20
|
+
- [Sandboxes](sandbox.md) — isolated containers for SWE agents, parallel execution
|
|
21
|
+
|
|
22
|
+
## Training
|
|
23
|
+
|
|
24
|
+
- [SFT finetuning](sft_finetune.md) — LoRA finetuning with Unsloth on a single H100
|
|
25
|
+
|
|
26
|
+
## Benchmarks
|
|
27
|
+
|
|
28
|
+
- [SWE-bench with Harbor](harbor.md) — run SWE-bench evaluations on your own GCP
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Sandboxes
|
|
2
|
+
|
|
3
|
+
Sandboxes are isolated containers you can exec commands into — like SSH into a fresh machine. They're used by SWE agents to run code, edit files, and run tests in a clean environment.
|
|
4
|
+
|
|
5
|
+
## Basic usage
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
import openmodal
|
|
9
|
+
|
|
10
|
+
app = openmodal.App("my-agent")
|
|
11
|
+
image = openmodal.Image.debian_slim().apt_install("git").pip_install("requests")
|
|
12
|
+
|
|
13
|
+
sandbox = openmodal.Sandbox.create(image=image, app=app, timeout=300)
|
|
14
|
+
|
|
15
|
+
result = sandbox.exec("echo hello")
|
|
16
|
+
print(result.output) # "hello"
|
|
17
|
+
print(result.returncode) # 0
|
|
18
|
+
|
|
19
|
+
sandbox.exec("git clone https://github.com/pallets/click.git /workspace")
|
|
20
|
+
sandbox.exec("cd /workspace && python3 -m pytest tests/")
|
|
21
|
+
|
|
22
|
+
sandbox.terminate()
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## How it works
|
|
26
|
+
|
|
27
|
+
Each sandbox is a Kubernetes pod on GKE:
|
|
28
|
+
|
|
29
|
+
1. `Sandbox.create()` → creates a pod with your image, keeps it alive with `sleep`
|
|
30
|
+
2. `sandbox.exec(command)` → runs bash commands inside the pod via Kubernetes exec API
|
|
31
|
+
3. Files persist between execs — the pod stays alive until you terminate it
|
|
32
|
+
4. `sandbox.terminate()` → deletes the pod
|
|
33
|
+
|
|
34
|
+
## Parallel sandboxes
|
|
35
|
+
|
|
36
|
+
Multiple sandboxes run simultaneously on the same cluster. Each is fully isolated.
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
import concurrent.futures
|
|
40
|
+
|
|
41
|
+
def run_agent(agent_id):
|
|
42
|
+
sandbox = openmodal.Sandbox.create(image=image, app=app)
|
|
43
|
+
sandbox.exec(f"echo 'agent {agent_id}' > /tmp/id.txt")
|
|
44
|
+
result = sandbox.exec("cat /tmp/id.txt")
|
|
45
|
+
sandbox.terminate()
|
|
46
|
+
return result.output
|
|
47
|
+
|
|
48
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as pool:
|
|
49
|
+
results = list(pool.map(run_agent, range(4)))
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Performance
|
|
53
|
+
|
|
54
|
+
With cached images on warm nodes:
|
|
55
|
+
|
|
56
|
+
| Operation | Latency |
|
|
57
|
+
|---|---|
|
|
58
|
+
| `Sandbox.create()` | ~5s |
|
|
59
|
+
| `sandbox.exec()` | ~0.2s |
|
|
60
|
+
| `sandbox.terminate()` | instant |
|
|
61
|
+
| 4 parallel sandboxes | ~5s (not 4x5s) |
|
|
62
|
+
|
|
63
|
+
First run builds the image (~2-3 min via Cloud Build). After that, the image is cached and creation is fast.
|
|
64
|
+
|
|
65
|
+
## Isolation
|
|
66
|
+
|
|
67
|
+
Each sandbox is its own pod:
|
|
68
|
+
- Separate filesystem — files in one sandbox don't appear in another
|
|
69
|
+
- Separate processes — nothing shared between sandboxes
|
|
70
|
+
- Separate network — each pod gets its own IP
|
|
71
|
+
|
|
72
|
+
## Run the example
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
openmodal run examples/sandbox.py
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
Launching 4 sandboxes in parallel...
|
|
80
|
+
|
|
81
|
+
Agent 0 (sandbox-test-4bb1bb27): create=5.3s total=11.3s
|
|
82
|
+
Agent 1 (sandbox-test-6f4a83f3): create=5.3s total=11.5s
|
|
83
|
+
Agent 2 (sandbox-test-c5696129): create=5.2s total=11.7s
|
|
84
|
+
Agent 3 (sandbox-test-c8ecb817): create=4.2s total=10.4s
|
|
85
|
+
|
|
86
|
+
All 4 agents passed in 11.7s
|
|
87
|
+
```
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# SFT finetuning with Unsloth
|
|
2
|
+
|
|
3
|
+
Finetune an LLM with LoRA on a single GPU using [Unsloth](https://github.com/unslothai/unsloth)'s optimized training. Based on [Modal's Unsloth example](https://modal.com/docs/examples/unsloth-finetune).
|
|
4
|
+
|
|
5
|
+
## Run
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
openmodal run examples/sft_finetune.py
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
With custom settings:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
openmodal run examples/sft_finetune.py --model-name unsloth/Qwen3-4B --max-steps 1000 --lora-r 32
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## What it does
|
|
18
|
+
|
|
19
|
+
1. Spins up an H100 GPU on GKE
|
|
20
|
+
2. Downloads Qwen3-4B (4-bit quantized) and the FineTome-100k dataset
|
|
21
|
+
3. Applies LoRA adapters and trains with Unsloth's optimized kernels
|
|
22
|
+
4. Saves checkpoints and final model to persistent GCS volumes
|
|
23
|
+
5. Supports resuming from checkpoints if interrupted
|
|
24
|
+
|
|
25
|
+
## The code
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
import openmodal
|
|
29
|
+
|
|
30
|
+
app = openmodal.App("sft-finetune")
|
|
31
|
+
|
|
32
|
+
train_image = (
|
|
33
|
+
openmodal.Image.debian_slim()
|
|
34
|
+
.uv_pip_install(
|
|
35
|
+
"accelerate", "datasets", "peft",
|
|
36
|
+
"transformers", "trl",
|
|
37
|
+
"unsloth[cu128-torch270]",
|
|
38
|
+
)
|
|
39
|
+
.env({"HF_HOME": "/model_cache"})
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
model_cache = openmodal.Volume.from_name("sft-model-cache", create_if_missing=True)
|
|
43
|
+
checkpoints = openmodal.Volume.from_name("sft-checkpoints", create_if_missing=True)
|
|
44
|
+
|
|
45
|
+
@app.function(
|
|
46
|
+
image=train_image,
|
|
47
|
+
gpu="H100",
|
|
48
|
+
volumes={"/model_cache": model_cache, "/checkpoints": checkpoints},
|
|
49
|
+
timeout=6 * 60 * 60,
|
|
50
|
+
retries=3,
|
|
51
|
+
)
|
|
52
|
+
def finetune(config):
|
|
53
|
+
from unsloth import FastLanguageModel
|
|
54
|
+
from trl import SFTTrainer
|
|
55
|
+
|
|
56
|
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
|
57
|
+
model_name=config.model_name,
|
|
58
|
+
load_in_4bit=True,
|
|
59
|
+
)
|
|
60
|
+
# ... LoRA setup, dataset loading, training ...
|
|
61
|
+
trainer.train()
|
|
62
|
+
model.save_pretrained("/checkpoints/final_model")
|
|
63
|
+
|
|
64
|
+
@app.local_entrypoint()
|
|
65
|
+
def main(model_name="unsloth/Qwen3-4B", max_steps=5):
|
|
66
|
+
finetune.remote(TrainingConfig(model_name=model_name, max_steps=max_steps))
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Features used
|
|
70
|
+
|
|
71
|
+
| Feature | How it's used |
|
|
72
|
+
|---|---|
|
|
73
|
+
| `gpu="H100"` | Single H100 for training |
|
|
74
|
+
| `Volume.from_name(create_if_missing=True)` | Persistent storage for model weights, datasets, checkpoints |
|
|
75
|
+
| `retries=3` | Auto-retry on preemption (spot instances) |
|
|
76
|
+
| `timeout=6*60*60` | 6 hour max training time |
|
|
77
|
+
| `finetune.remote(config)` | Runs training on the cloud GPU |
|
|
78
|
+
| CLI args (`--max-steps`, `--lora-r`) | Tweak hyperparameters from command line |
|
|
79
|
+
|
|
80
|
+
## Compared to Modal
|
|
81
|
+
|
|
82
|
+
The only difference from Modal's example is the import line:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
import openmodal # instead of: import modal
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Everything else — image definition, volumes, GPU selection, remote execution — is the same API.
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# GPU serving with vLLM
|
|
2
|
+
|
|
3
|
+
Deploy a model on a GPU and get an OpenAI-compatible endpoint. Scales to zero when idle.
|
|
4
|
+
|
|
5
|
+
## The code
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
import openmodal
|
|
9
|
+
|
|
10
|
+
MODEL_NAME = "Qwen/Qwen3.5-0.8B"
|
|
11
|
+
|
|
12
|
+
vllm_image = (
|
|
13
|
+
openmodal.Image.from_registry("nvidia/cuda:12.8.0-devel-ubuntu22.04", add_python="3.12")
|
|
14
|
+
.entrypoint([])
|
|
15
|
+
.apt_install("git")
|
|
16
|
+
.uv_pip_install("vllm", "huggingface-hub==0.36.0",
|
|
17
|
+
extra_options="--extra-index-url https://wheels.vllm.ai/nightly")
|
|
18
|
+
.pip_install("transformers @ git+https://github.com/huggingface/transformers.git@main")
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
app = openmodal.App("vllm-test")
|
|
22
|
+
|
|
23
|
+
@app.function(
|
|
24
|
+
image=vllm_image,
|
|
25
|
+
gpu="H100",
|
|
26
|
+
scaledown_window=5 * 60,
|
|
27
|
+
timeout=10 * 60,
|
|
28
|
+
)
|
|
29
|
+
@openmodal.web_server(port=8000, startup_timeout=20 * 60)
|
|
30
|
+
@openmodal.concurrent(max_inputs=8)
|
|
31
|
+
def serve():
|
|
32
|
+
import subprocess
|
|
33
|
+
subprocess.Popen([
|
|
34
|
+
"vllm", "serve", MODEL_NAME,
|
|
35
|
+
"--host", "0.0.0.0", "--port", "8000",
|
|
36
|
+
"--served-model-name", MODEL_NAME,
|
|
37
|
+
"--max-model-len", "4096",
|
|
38
|
+
"--enforce-eager",
|
|
39
|
+
])
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Deploy
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
openmodal deploy examples/vllm_serving.py
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
openmodal deploy: vllm-test
|
|
50
|
+
building image...
|
|
51
|
+
image: us-central1-docker.pkg.dev/.../vllm-test:a9b8fa41ec13
|
|
52
|
+
creating container (H100)...
|
|
53
|
+
waiting for healthy (timeout: 1200s)...
|
|
54
|
+
serve => http://104.155.171.209:8000
|
|
55
|
+
deploy complete.
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Query
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
curl http://104.155.171.209:8000/v1/chat/completions \
|
|
62
|
+
-H "Content-Type: application/json" \
|
|
63
|
+
-d '{"model":"Qwen/Qwen3.5-0.8B","messages":[{"role":"user","content":"What is 2+2?"}],"max_tokens":16}'
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Works with any OpenAI client:
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from openai import OpenAI
|
|
70
|
+
client = OpenAI(base_url="http://104.155.171.209:8000/v1", api_key="unused")
|
|
71
|
+
resp = client.chat.completions.create(
|
|
72
|
+
model="Qwen/Qwen3.5-0.8B",
|
|
73
|
+
messages=[{"role": "user", "content": "What is 2+2?"}],
|
|
74
|
+
)
|
|
75
|
+
print(resp.choices[0].message.content)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Stop
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
openmodal stop vllm-test
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Under the hood
|
|
85
|
+
|
|
86
|
+
When you run `openmodal deploy`, here's what happens:
|
|
87
|
+
|
|
88
|
+
**Building the image**
|
|
89
|
+
|
|
90
|
+
Your image definition (`debian_slim().pip_install(...)`) gets turned into a Dockerfile and built via Google Cloud Build. The built image is stored in Artifact Registry. If you deploy the same code again, the image is already cached and this step is skipped.
|
|
91
|
+
|
|
92
|
+
**Starting the server**
|
|
93
|
+
|
|
94
|
+
OpenModal sees `gpu="H100"` + `@web_server` and picks GKE (Kubernetes) as the backend. It creates three things:
|
|
95
|
+
|
|
96
|
+
- A **Deployment** — tells Kubernetes "run one copy of this container with an H100 GPU"
|
|
97
|
+
- A **Service** — gives it a public IP so you can send requests to it
|
|
98
|
+
- A **CronJob** — checks every minute if anyone is using the server
|
|
99
|
+
|
|
100
|
+
GKE doesn't have an H100 machine sitting around, so it provisions one (a spot instance, ~60% cheaper). This takes a few minutes. Once the machine is ready, your container starts, vLLM loads the model, and the health check passes.
|
|
101
|
+
|
|
102
|
+
**Scaling down**
|
|
103
|
+
|
|
104
|
+
The CronJob runs every minute and checks: are there any active TCP connections to port 8000? If there haven't been any for `scaledown_window` seconds (5 min in this example), it scales the Deployment to 0 — meaning the container is stopped.
|
|
105
|
+
|
|
106
|
+
Once the container is gone, the H100 machine has nothing running on it. GKE's node autoscaler notices this and removes the machine after ~5 minutes. Now you're paying $0 for GPUs.
|
|
107
|
+
|
|
108
|
+
**Costs**
|
|
109
|
+
|
|
110
|
+
| State | What you pay |
|
|
111
|
+
|---|---|
|
|
112
|
+
| Serving requests | ~$1.20/hr (H100 spot) |
|
|
113
|
+
| Idle, within scaledown window | Same |
|
|
114
|
+
| Scaled to zero | ~$0.10/hr (cluster overhead) |
|
|
115
|
+
| Cluster deleted | $0 |
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# Web scraper
|
|
2
|
+
|
|
3
|
+
This example shows how to build a distributed web scraper with OpenModal,
|
|
4
|
+
progressing from a simple local script to parallel remote execution with
|
|
5
|
+
custom container images.
|
|
6
|
+
|
|
7
|
+
## Step 1: Scrape links locally
|
|
8
|
+
|
|
9
|
+
Start with plain Python:
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
import re
|
|
13
|
+
import urllib.request
|
|
14
|
+
|
|
15
|
+
def get_links(url):
|
|
16
|
+
response = urllib.request.urlopen(url)
|
|
17
|
+
html = response.read().decode("utf8")
|
|
18
|
+
links = []
|
|
19
|
+
for match in re.finditer('href="(.*?)"', html):
|
|
20
|
+
links.append(match.group(1))
|
|
21
|
+
return links
|
|
22
|
+
|
|
23
|
+
if __name__ == "__main__":
|
|
24
|
+
print(get_links("http://example.com"))
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
python webscraper.py
|
|
29
|
+
# ['https://www.iana.org/domains/example']
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Step 2: Run it remotely
|
|
33
|
+
|
|
34
|
+
Add OpenModal — the only changes are the import, the decorator, and the entrypoint:
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
import re
|
|
38
|
+
import urllib.request
|
|
39
|
+
import openmodal
|
|
40
|
+
|
|
41
|
+
app = openmodal.App(name="example-webscraper")
|
|
42
|
+
|
|
43
|
+
@app.function()
|
|
44
|
+
def get_links(url):
|
|
45
|
+
response = urllib.request.urlopen(url)
|
|
46
|
+
html = response.read().decode("utf8")
|
|
47
|
+
links = []
|
|
48
|
+
for match in re.finditer('href="(.*?)"', html):
|
|
49
|
+
links.append(match.group(1))
|
|
50
|
+
return links
|
|
51
|
+
|
|
52
|
+
@app.local_entrypoint()
|
|
53
|
+
def main(url: str = "http://example.com"):
|
|
54
|
+
links = get_links.remote(url)
|
|
55
|
+
print(links)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
openmodal run examples/webscraper.py --url http://example.com
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
✓ Initialized.
|
|
64
|
+
✓ Created objects.
|
|
65
|
+
['https://iana.org/domains/example']
|
|
66
|
+
✓ App completed.
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
The function ran on a GCE container, not on your machine.
|
|
70
|
+
|
|
71
|
+
## Step 3: Add dependencies with a custom image
|
|
72
|
+
|
|
73
|
+
Use `requests` and `beautifulsoup4` for better HTML parsing.
|
|
74
|
+
Define a custom container image with the dependencies:
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
import openmodal
|
|
78
|
+
|
|
79
|
+
app = openmodal.App("example-webscraper-requests")
|
|
80
|
+
|
|
81
|
+
scraper_image = openmodal.Image.debian_slim().pip_install("requests", "beautifulsoup4")
|
|
82
|
+
|
|
83
|
+
@app.function(image=scraper_image)
|
|
84
|
+
async def get_links(url: str) -> list[str]:
|
|
85
|
+
import asyncio
|
|
86
|
+
import requests
|
|
87
|
+
from bs4 import BeautifulSoup
|
|
88
|
+
|
|
89
|
+
resp = await asyncio.to_thread(requests.get, url, timeout=10)
|
|
90
|
+
soup = BeautifulSoup(resp.text, "html.parser")
|
|
91
|
+
return [a["href"] for a in soup.find_all("a", href=True)]
|
|
92
|
+
|
|
93
|
+
@app.local_entrypoint()
|
|
94
|
+
def main():
|
|
95
|
+
urls = ["http://example.com", "http://modal.com"]
|
|
96
|
+
for links in get_links.map(urls):
|
|
97
|
+
for link in links:
|
|
98
|
+
print(link)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
openmodal run examples/webscraper_requests.py
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
The first run builds the Docker image (takes ~2 minutes). Subsequent runs
|
|
106
|
+
use the cached image and start much faster.
|
|
107
|
+
|
|
108
|
+
## What this demonstrates
|
|
109
|
+
|
|
110
|
+
| Feature | How it's used |
|
|
111
|
+
|---|---|
|
|
112
|
+
| `f.remote(url)` | Run a single function call on GCP |
|
|
113
|
+
| `f.map(urls)` | Run multiple calls in parallel |
|
|
114
|
+
| `Image.debian_slim()` | Base container image with Python |
|
|
115
|
+
| `.pip_install(...)` | Add Python packages to the image |
|
|
116
|
+
| `async def` | Async functions work transparently |
|
|
117
|
+
| CLI args (`--url`) | Entrypoint parameters become CLI flags |
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# OpenModal
|
|
2
|
+
|
|
3
|
+
A cloud-agnostic runtime that implements [Modal](https://modal.com)'s Python interface.
|
|
4
|
+
|
|
5
|
+
I built this because I wanted to run Modal on my own GCP account. Modal's API is clean and I didn't want to learn a different one. So OpenModal lets you write the same code and run it on your own infrastructure.
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
import openmodal
|
|
9
|
+
|
|
10
|
+
app = openmodal.App("my-experiment")
|
|
11
|
+
|
|
12
|
+
@app.function(gpu="H100")
|
|
13
|
+
def train(config):
|
|
14
|
+
...
|
|
15
|
+
|
|
16
|
+
results = train.map(configs)
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## What works
|
|
20
|
+
|
|
21
|
+
- `f.local()`, `f.remote()`, `f.map()`
|
|
22
|
+
- GPU serving with auto scale-to-zero
|
|
23
|
+
- Custom images, secrets, retries, volumes
|
|
24
|
+
- GKE with spot GPUs (H100, A100, L4)
|
|
25
|
+
- CLI: `openmodal run`, `deploy`, `stop`, `ps`
|
|
26
|
+
|
|
27
|
+
## Get started
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install openmodal
|
|
31
|
+
gcloud auth login
|
|
32
|
+
openmodal run examples/hello_world.py
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
[Setup guide](setup.md) · [Examples](examples/index.md) · [Modal docs](https://modal.com/docs/guide) (same API, just swap the import)
|
|
36
|
+
|
|
37
|
+
## License
|
|
38
|
+
|
|
39
|
+
Apache-2.0
|