bellhop-py 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ __pycache__/
2
+ *.pyc
3
+ .venv/
4
+ dist/
5
+ build/
6
+ *.egg-info/
7
+ .pytest_cache/
8
+ .env
9
+ experiments/
10
+ _testcode/results/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Daniel Tan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,285 @@
1
+ Metadata-Version: 2.4
2
+ Name: bellhop-py
3
+ Version: 0.3.0
4
+ Summary: Async Python library that checks your code into an ephemeral box (RunPod pod or Modal sandbox), runs it, brings the results back, and checks out.
5
+ Project-URL: Homepage, https://github.com/dtch1997/bellhop
6
+ Project-URL: Repository, https://github.com/dtch1997/bellhop
7
+ Project-URL: Issues, https://github.com/dtch1997/bellhop/issues
8
+ Author: Daniel Tan
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: compute,ephemeral,gpu,modal,runpod,sandbox
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Framework :: AsyncIO
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Topic :: Software Development :: Libraries
17
+ Classifier: Topic :: System :: Distributed Computing
18
+ Requires-Python: >=3.10
19
+ Requires-Dist: httpx>=0.27
20
+ Provides-Extra: dev
21
+ Requires-Dist: modal>=1.0; extra == 'dev'
22
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
23
+ Requires-Dist: pytest>=8; extra == 'dev'
24
+ Provides-Extra: modal
25
+ Requires-Dist: modal>=1.0; extra == 'modal'
26
+ Description-Content-Type: text/markdown
27
+
28
+ # bellhop
29
+
30
+ **Check your code into an ephemeral box, run it, bring the results back, and
31
+ check out** — an async Python library for disposable compute. Two backends:
32
+ a [RunPod](https://runpod.io) pod or a [Modal](https://modal.com) sandbox.
33
+
34
+ Like a hotel bellhop: it books a room (provisions the box), waits until it's
35
+ actually ready, carries your luggage up (uploads your code), and when you leave
36
+ it brings your bags back down (pulls results) and checks out (tears the box
37
+ down) — so you never leave a box (or a bill) running by accident.
38
+
39
+ ```python
40
+ import asyncio
41
+ from bellhop import pod, PodConfig
42
+
43
+ async def main():
44
+ async with pod(PodConfig(gpu="RTX4090")) as p:
45
+ await p.push("./mycode", "/workspace/job")
46
+ r = await p.exec("cd /workspace/job && python train.py")
47
+ print(r.stdout)
48
+ await p.pull("/workspace/job/out", "./results")
49
+ # pod is gone here — even if the body raised
50
+
51
+ asyncio.run(main())
52
+ ```
53
+
54
+ The same code runs on Modal by swapping the config — `sandbox(ModalConfig(...))`
55
+ instead of `pod(PodConfig(...))` (see [Two backends](#two-backends) below).
56
+
57
+ The RunPod backend talks to the RunPod **REST API** (`rest.runpod.io/v1`)
58
+ directly over `httpx`, falling back to the **GraphQL API** only to set native
59
+ safety timers. No `runpodctl`, no vendored SDK. The Modal backend drives a
60
+ Modal **Sandbox** via the `modal` SDK.
61
+
62
+ ## Install
63
+
64
+ ```bash
65
+ pip install bellhop-py # RunPod backend (or: pip install git+https://github.com/dtch1997/bellhop)
66
+ pip install 'bellhop-py[modal]' # add the Modal backend
67
+ ```
68
+
69
+ (The PyPI distribution is `bellhop-py` — the bare `bellhop` name is an
70
+ unrelated package — but the import name and CLI are plain `bellhop`.)
71
+
72
+ For the **RunPod** backend, set `RUNPOD_API_KEY`. Connection uses your SSH
73
+ keypair (`~/.ssh/id_ed25519` by default): bellhop injects the public key as the
74
+ pod's `PUBLIC_KEY` env so `root@pod` is reachable. For the **Modal** backend,
75
+ configure Modal auth (`modal token new`, or `MODAL_TOKEN_ID` /
76
+ `MODAL_TOKEN_SECRET`). (GCS upload, if you enable it, needs `gcloud` on your
77
+ `PATH` either way.)
78
+
79
+ ## Two backends
80
+
81
+ Both backends implement the same `ExecBox` contract — `exec` / `push` / `pull`
82
+ / `exists_remote` / `teardown` — so the high-level `run()` / `run_many()`
83
+ pipeline (below) is provider-agnostic: hand it a `PodConfig` for RunPod or a
84
+ `ModalConfig` for Modal and everything else is identical.
85
+
86
+ ```python
87
+ from bellhop import sandbox, ModalConfig
88
+
89
+ async with sandbox(ModalConfig(gpu="A10G")) as b: # CPU box: omit gpu
90
+ await b.push("./mycode", "/workspace/job")
91
+ r = await b.exec("cd /workspace/job && python train.py")
92
+ print(r.stdout)
93
+ await b.pull("/workspace/job/out", "./results")
94
+ # sandbox terminated on exit (pass keep=True to leave it up)
95
+ ```
96
+
97
+ The whole common surface is spelled the same on both configs:
98
+
99
+ - **`gpu=`** — a canonical short name (`"A100"`, `"H100"`, `"L4"`, …); `None`
100
+ means a CPU box. On RunPod the name expands through `GPU_ALIASES` to the
101
+ *list* of matching gpuTypeIds (e.g. `"A100"` → PCIe *and* SXM), which the
102
+ REST API accepts wholesale — better stock availability than naming one SKU.
103
+ A full RunPod id (`"NVIDIA GeForce RTX 4090"`) still passes verbatim.
104
+ - **`max_lifetime=`** — the hard server-side kill switch, `timedelta` on both
105
+ (maps to `terminate_after` on RunPod, `timeout` on Modal).
106
+ - **`image=` / `image_preset=`** — the `pytorch-cuda` preset is pinned to the
107
+ same torch 2.4.0 + CUDA 12.4 environment on both backends.
108
+
109
+ What genuinely differs stays backend-specific:
110
+
111
+ | | RunPod (`PodConfig`, `pod()`) | Modal (`ModalConfig`, `sandbox()`) |
112
+ |---|---|---|
113
+ | Readiness | SSH/probe wait (below) | none — `create()` returns an execable box |
114
+ | Extra TTL | `stop_after` (wall-clock compute halt) | `idle_timeout` (kill after inactivity) |
115
+ | Image extras | — | `pip=` / `apt=`, `modal.Image`, `secrets=`, `volumes=` |
116
+ | Placement | `cloud=` SECURE/COMMUNITY (+fallback) | `region=`, `cpu=`, `memory=` |
117
+ | Auth | `RUNPOD_API_KEY` + SSH keypair | Modal token (`modal token new`) |
118
+
119
+ (`stop_after` and `idle_timeout` are deliberately *not* unified — one is a
120
+ wall-clock timer, the other an inactivity timer; pretending they're the same
121
+ concept would be a trap. `gpu_id=` remains as a legacy spelling of a verbatim
122
+ RunPod id.)
123
+
124
+ ## "Return when functional" — the hard part (RunPod only)
125
+
126
+ `desiredStatus == RUNNING` is necessary but **not sufficient**: sshd / your
127
+ server typically lags the RUNNING state by 30–60s. So once a pod is routable
128
+ (RUNNING + public IP + mapped port), bellhop runs a **readiness probe** until it
129
+ passes before handing the pod to you. "Functional" is caller-specific, so it's
130
+ pluggable:
131
+
132
+ ```python
133
+ from bellhop import SshProbe, TcpProbe, HttpProbe, LogMarkerProbe
134
+
135
+ PodConfig(..., ready=SshProbe("true")) # ssh job pods (default)
136
+ PodConfig(..., ready=HttpProbe(8000, "/health")) # a served endpoint
137
+ PodConfig(..., ready=LogMarkerProbe("server up")) # headless pods
138
+ ```
139
+
140
+ (Modal sandboxes are execable as soon as `create()` returns, so there's no
141
+ probe step on that backend.)
142
+
143
+ ## Two ways to use it
144
+
145
+ ### Composable pod — multi-step / interactive
146
+
147
+ Keep one pod alive and run many steps against it:
148
+
149
+ ```python
150
+ async with pod(PodConfig(gpu="RTX4090")) as p:
151
+ await p.push("./code", "/workspace/job")
152
+ await p.exec("cd /workspace/job && python train.py", env={"HF_TOKEN": tok})
153
+ await p.exec("python eval.py") # same pod, no re-provision
154
+ await p.pull("/workspace/job/results", "./out")
155
+ print(p.proxy_url(8000)) # https://<id>-8000.proxy.runpod.net
156
+ # torn down on exit (pass keep=True to leave it up)
157
+ ```
158
+
159
+ ### One-shot — provision, run, collect, done
160
+
161
+ ```python
162
+ import asyncio
163
+ from bellhop import run, RunSpec, PodConfig
164
+
165
+ res = asyncio.run(run(
166
+ RunSpec(slug="demo", codebase="./mycode", run="python go.py"),
167
+ PodConfig(gpu="A100"), # ModalConfig(gpu="A100") runs the same pipeline on Modal
168
+ ))
169
+ print(res.remote_exit, res.local_results)
170
+ ```
171
+
172
+ `run()` provisions → waits-functional → uploads the codebase (local dir *or* git
173
+ URL) → runs `setup` then `run` (tee'd to `results/run.log`) → pulls the results
174
+ dir back → optionally uploads to GCS → tears down → returns a `RunResult`. Pass
175
+ a `ModalConfig` instead of a `PodConfig` to run the exact same pipeline on a
176
+ Modal sandbox.
177
+
178
+ CLI equivalent — the same `--gpu` flag works on both backends (omit it for a
179
+ CPU box):
180
+
181
+ ```bash
182
+ bellhop run --slug demo --codebase ./mycode --run "python go.py" --gpu A100
183
+ bellhop run --backend modal --slug demo --codebase ./mycode --run "python go.py" --gpu A100
184
+ ```
185
+
186
+ ### Fan out a sweep
187
+
188
+ ```python
189
+ from dataclasses import replace
190
+ from bellhop import run_many
191
+
192
+ base = RunSpec(slug="sweep", codebase="./code", run="python train.py")
193
+ specs = [replace(base, slug=f"lr{lr}", run=f"python train.py --lr {lr}")
194
+ for lr in (1e-4, 3e-4, 1e-3)]
195
+ results = await run_many(specs, gpu_cfg, max_concurrency=4)
196
+ ```
197
+
198
+ ## Cleanup: two layers
199
+
200
+ | When | Handled by |
201
+ |------|------------|
202
+ | Normal exit, exception, Ctrl-C | the `async with` block's `finally` — **always** tears the pod down (unless `keep=True`) |
203
+ | The host process itself dies (kill -9, crash, reboot) | native RunPod safety timers (below) |
204
+
205
+ The context manager is the primary guarantee and covers essentially everything.
206
+ The timers are a backstop for the one case `finally` can't reach.
207
+
208
+ ### Native safety timers
209
+
210
+ Every GPU pod is created with RunPod's own server-side timers, set atomically at
211
+ creation so they hold even if your process dies the instant after:
212
+
213
+ ```python
214
+ from datetime import timedelta
215
+ PodConfig(
216
+ stop_after=timedelta(hours=24), # halt compute billing; disk persists, restartable
217
+ terminate_after=timedelta(hours=72), # delete the pod; all billing stops
218
+ )
219
+ # set either to None to disable
220
+ ```
221
+
222
+ These use the GraphQL `podFindAndDeployOnDemand` mutation (REST has no TTL
223
+ field), so setting a timer routes pod creation through GraphQL automatically.
224
+
225
+ > **Granularity caveat.** RunPod enforces these on a coarse schedule, *not*
226
+ > minute-precise — a short timer may fire well after its deadline. Treat them as
227
+ > an hours-scale backstop, not a precise kill switch. The `async with` cleanup
228
+ > is what you should rely on for prompt teardown. Native TTL currently applies
229
+ > to GPU pods only (the on-demand path); CPU pods rely on `finally` alone.
230
+
231
+ On the **Modal** backend the equivalents are first-class `create` kwargs:
232
+ `ModalConfig(timeout=timedelta(hours=24))` is the hard max lifetime and
233
+ `idle_timeout=timedelta(minutes=30)` terminates the sandbox after inactivity —
234
+ no GraphQL detour, and they apply to CPU and GPU sandboxes alike.
235
+
236
+ The backend-agnostic spelling of the hard kill is
237
+ **`max_lifetime=timedelta(...)`** — set it on either config (or
238
+ `--max-lifetime-hours` on the CLI) and it maps to `terminate_after` on RunPod
239
+ and `timeout` on Modal, taking precedence over those fields.
240
+
241
+ ## Optional: persist results to GCS
242
+
243
+ Off by default. Pass `gcs_base` (or `--gcs-base`) to upload the pulled results
244
+ to Google Cloud Storage from your machine (credentials never touch the pod):
245
+
246
+ ```python
247
+ RunSpec(slug="demo", codebase="./code", run="python go.py",
248
+ gcs_base="gs://your-bucket/experiments")
249
+ # res.gcs_uri and res.retrieve_cmd are populated
250
+ ```
251
+
252
+ ## Typed errors
253
+
254
+ `BellhopError` subclasses let you branch on failure mode:
255
+ `PreflightError` (bad config / missing key / `modal` not installed),
256
+ `ProvisionError` (pod or sandbox create failed), `PodNotReadyError` (never became
257
+ functional), `RemoteJobError` (carries `.remote_exit` + `.log_tail`),
258
+ `ResultsMissingError`, `GcsUploadError`. (`RunpodError` is a back-compat alias
259
+ for `BellhopError`.)
260
+
261
+ ## Notes
262
+
263
+ - Code/result transfer is **tar-over-ssh** on RunPod and **tar-over-exec** on
264
+ Modal — only needs `tar` in the image (no rsync; on RunPod also `ssh`).
265
+ - Env vars passed to `exec(env=...)` never appear in the box's process list:
266
+ RunPod exports them inside a script fed over stdin; Modal passes them over its
267
+ API, not argv.
268
+ - On out-of-stock, a RunPod `COMMUNITY` request retries on `SECURE` automatically
269
+ (toggle with `cloud_fallback=False`).
270
+ - The Modal default image is `debian_slim` with `git` + `tar`; add packages with
271
+ `ModalConfig(pip=[...], apt=[...])`, or supply your own `modal.Image` /
272
+ registry ref (assumed to already have `tar`).
273
+
274
+ ## Development
275
+
276
+ ```bash
277
+ pip install -e ".[dev]"
278
+ pytest # offline unit tests (no pod/sandbox, no cost)
279
+ RUNPOD_LIVE=1 pytest tests/integration_live.py -s # billed RunPod end-to-end test
280
+ MODAL_LIVE=1 pytest tests/integration_modal.py -s # billed Modal end-to-end test
281
+ ```
282
+
283
+ ## License
284
+
285
+ MIT
@@ -0,0 +1,258 @@
1
+ # bellhop
2
+
3
+ **Check your code into an ephemeral box, run it, bring the results back, and
4
+ check out** — an async Python library for disposable compute. Two backends:
5
+ a [RunPod](https://runpod.io) pod or a [Modal](https://modal.com) sandbox.
6
+
7
+ Like a hotel bellhop: it books a room (provisions the box), waits until it's
8
+ actually ready, carries your luggage up (uploads your code), and when you leave
9
+ it brings your bags back down (pulls results) and checks out (tears the box
10
+ down) — so you never leave a box (or a bill) running by accident.
11
+
12
+ ```python
13
+ import asyncio
14
+ from bellhop import pod, PodConfig
15
+
16
+ async def main():
17
+ async with pod(PodConfig(gpu="RTX4090")) as p:
18
+ await p.push("./mycode", "/workspace/job")
19
+ r = await p.exec("cd /workspace/job && python train.py")
20
+ print(r.stdout)
21
+ await p.pull("/workspace/job/out", "./results")
22
+ # pod is gone here — even if the body raised
23
+
24
+ asyncio.run(main())
25
+ ```
26
+
27
+ The same code runs on Modal by swapping the config — `sandbox(ModalConfig(...))`
28
+ instead of `pod(PodConfig(...))` (see [Two backends](#two-backends) below).
29
+
30
+ The RunPod backend talks to the RunPod **REST API** (`rest.runpod.io/v1`)
31
+ directly over `httpx`, falling back to the **GraphQL API** only to set native
32
+ safety timers. No `runpodctl`, no vendored SDK. The Modal backend drives a
33
+ Modal **Sandbox** via the `modal` SDK.
34
+
35
+ ## Install
36
+
37
+ ```bash
38
+ pip install bellhop-py # RunPod backend (or: pip install git+https://github.com/dtch1997/bellhop)
39
+ pip install 'bellhop-py[modal]' # add the Modal backend
40
+ ```
41
+
42
+ (The PyPI distribution is `bellhop-py` — the bare `bellhop` name is an
43
+ unrelated package — but the import name and CLI are plain `bellhop`.)
44
+
45
+ For the **RunPod** backend, set `RUNPOD_API_KEY`. Connection uses your SSH
46
+ keypair (`~/.ssh/id_ed25519` by default): bellhop injects the public key as the
47
+ pod's `PUBLIC_KEY` env so `root@pod` is reachable. For the **Modal** backend,
48
+ configure Modal auth (`modal token new`, or `MODAL_TOKEN_ID` /
49
+ `MODAL_TOKEN_SECRET`). (GCS upload, if you enable it, needs `gcloud` on your
50
+ `PATH` either way.)
51
+
52
+ ## Two backends
53
+
54
+ Both backends implement the same `ExecBox` contract — `exec` / `push` / `pull`
55
+ / `exists_remote` / `teardown` — so the high-level `run()` / `run_many()`
56
+ pipeline (below) is provider-agnostic: hand it a `PodConfig` for RunPod or a
57
+ `ModalConfig` for Modal and everything else is identical.
58
+
59
+ ```python
60
+ from bellhop import sandbox, ModalConfig
61
+
62
+ async with sandbox(ModalConfig(gpu="A10G")) as b: # CPU box: omit gpu
63
+ await b.push("./mycode", "/workspace/job")
64
+ r = await b.exec("cd /workspace/job && python train.py")
65
+ print(r.stdout)
66
+ await b.pull("/workspace/job/out", "./results")
67
+ # sandbox terminated on exit (pass keep=True to leave it up)
68
+ ```
69
+
70
+ The whole common surface is spelled the same on both configs:
71
+
72
+ - **`gpu=`** — a canonical short name (`"A100"`, `"H100"`, `"L4"`, …); `None`
73
+ means a CPU box. On RunPod the name expands through `GPU_ALIASES` to the
74
+ *list* of matching gpuTypeIds (e.g. `"A100"` → PCIe *and* SXM), which the
75
+ REST API accepts wholesale — better stock availability than naming one SKU.
76
+ A full RunPod id (`"NVIDIA GeForce RTX 4090"`) still passes verbatim.
77
+ - **`max_lifetime=`** — the hard server-side kill switch, `timedelta` on both
78
+ (maps to `terminate_after` on RunPod, `timeout` on Modal).
79
+ - **`image=` / `image_preset=`** — the `pytorch-cuda` preset is pinned to the
80
+ same torch 2.4.0 + CUDA 12.4 environment on both backends.
81
+
82
+ What genuinely differs stays backend-specific:
83
+
84
+ | | RunPod (`PodConfig`, `pod()`) | Modal (`ModalConfig`, `sandbox()`) |
85
+ |---|---|---|
86
+ | Readiness | SSH/probe wait (below) | none — `create()` returns an execable box |
87
+ | Extra TTL | `stop_after` (wall-clock compute halt) | `idle_timeout` (kill after inactivity) |
88
+ | Image extras | — | `pip=` / `apt=`, `modal.Image`, `secrets=`, `volumes=` |
89
+ | Placement | `cloud=` SECURE/COMMUNITY (+fallback) | `region=`, `cpu=`, `memory=` |
90
+ | Auth | `RUNPOD_API_KEY` + SSH keypair | Modal token (`modal token new`) |
91
+
92
+ (`stop_after` and `idle_timeout` are deliberately *not* unified — one is a
93
+ wall-clock timer, the other an inactivity timer; pretending they're the same
94
+ concept would be a trap. `gpu_id=` remains as a legacy spelling of a verbatim
95
+ RunPod id.)
96
+
97
+ ## "Return when functional" — the hard part (RunPod only)
98
+
99
+ `desiredStatus == RUNNING` is necessary but **not sufficient**: sshd / your
100
+ server typically lags the RUNNING state by 30–60s. So once a pod is routable
101
+ (RUNNING + public IP + mapped port), bellhop runs a **readiness probe** until it
102
+ passes before handing the pod to you. "Functional" is caller-specific, so it's
103
+ pluggable:
104
+
105
+ ```python
106
+ from bellhop import SshProbe, TcpProbe, HttpProbe, LogMarkerProbe
107
+
108
+ PodConfig(..., ready=SshProbe("true")) # ssh job pods (default)
109
+ PodConfig(..., ready=HttpProbe(8000, "/health")) # a served endpoint
110
+ PodConfig(..., ready=LogMarkerProbe("server up")) # headless pods
111
+ ```
112
+
113
+ (Modal sandboxes are execable as soon as `create()` returns, so there's no
114
+ probe step on that backend.)
115
+
116
+ ## Two ways to use it
117
+
118
+ ### Composable pod — multi-step / interactive
119
+
120
+ Keep one pod alive and run many steps against it:
121
+
122
+ ```python
123
+ async with pod(PodConfig(gpu="RTX4090")) as p:
124
+ await p.push("./code", "/workspace/job")
125
+ await p.exec("cd /workspace/job && python train.py", env={"HF_TOKEN": tok})
126
+ await p.exec("python eval.py") # same pod, no re-provision
127
+ await p.pull("/workspace/job/results", "./out")
128
+ print(p.proxy_url(8000)) # https://<id>-8000.proxy.runpod.net
129
+ # torn down on exit (pass keep=True to leave it up)
130
+ ```
131
+
132
+ ### One-shot — provision, run, collect, done
133
+
134
+ ```python
135
+ import asyncio
136
+ from bellhop import run, RunSpec, PodConfig
137
+
138
+ res = asyncio.run(run(
139
+ RunSpec(slug="demo", codebase="./mycode", run="python go.py"),
140
+ PodConfig(gpu="A100"), # ModalConfig(gpu="A100") runs the same pipeline on Modal
141
+ ))
142
+ print(res.remote_exit, res.local_results)
143
+ ```
144
+
145
+ `run()` provisions → waits-functional → uploads the codebase (local dir *or* git
146
+ URL) → runs `setup` then `run` (tee'd to `results/run.log`) → pulls the results
147
+ dir back → optionally uploads to GCS → tears down → returns a `RunResult`. Pass
148
+ a `ModalConfig` instead of a `PodConfig` to run the exact same pipeline on a
149
+ Modal sandbox.
150
+
151
+ CLI equivalent — the same `--gpu` flag works on both backends (omit it for a
152
+ CPU box):
153
+
154
+ ```bash
155
+ bellhop run --slug demo --codebase ./mycode --run "python go.py" --gpu A100
156
+ bellhop run --backend modal --slug demo --codebase ./mycode --run "python go.py" --gpu A100
157
+ ```
158
+
159
+ ### Fan out a sweep
160
+
161
+ ```python
162
+ from dataclasses import replace
163
+ from bellhop import run_many
164
+
165
+ base = RunSpec(slug="sweep", codebase="./code", run="python train.py")
166
+ specs = [replace(base, slug=f"lr{lr}", run=f"python train.py --lr {lr}")
167
+ for lr in (1e-4, 3e-4, 1e-3)]
168
+ results = await run_many(specs, gpu_cfg, max_concurrency=4)
169
+ ```
170
+
171
+ ## Cleanup: two layers
172
+
173
+ | When | Handled by |
174
+ |------|------------|
175
+ | Normal exit, exception, Ctrl-C | the `async with` block's `finally` — **always** tears the pod down (unless `keep=True`) |
176
+ | The host process itself dies (kill -9, crash, reboot) | native RunPod safety timers (below) |
177
+
178
+ The context manager is the primary guarantee and covers essentially everything.
179
+ The timers are a backstop for the one case `finally` can't reach.
180
+
181
+ ### Native safety timers
182
+
183
+ Every GPU pod is created with RunPod's own server-side timers, set atomically at
184
+ creation so they hold even if your process dies the instant after:
185
+
186
+ ```python
187
+ from datetime import timedelta
188
+ PodConfig(
189
+ stop_after=timedelta(hours=24), # halt compute billing; disk persists, restartable
190
+ terminate_after=timedelta(hours=72), # delete the pod; all billing stops
191
+ )
192
+ # set either to None to disable
193
+ ```
194
+
195
+ These use the GraphQL `podFindAndDeployOnDemand` mutation (REST has no TTL
196
+ field), so setting a timer routes pod creation through GraphQL automatically.
197
+
198
+ > **Granularity caveat.** RunPod enforces these on a coarse schedule, *not*
199
+ > minute-precise — a short timer may fire well after its deadline. Treat them as
200
+ > an hours-scale backstop, not a precise kill switch. The `async with` cleanup
201
+ > is what you should rely on for prompt teardown. Native TTL currently applies
202
+ > to GPU pods only (the on-demand path); CPU pods rely on `finally` alone.
203
+
204
+ On the **Modal** backend the equivalents are first-class `create` kwargs:
205
+ `ModalConfig(timeout=timedelta(hours=24))` is the hard max lifetime and
206
+ `idle_timeout=timedelta(minutes=30)` terminates the sandbox after inactivity —
207
+ no GraphQL detour, and they apply to CPU and GPU sandboxes alike.
208
+
209
+ The backend-agnostic spelling of the hard kill is
210
+ **`max_lifetime=timedelta(...)`** — set it on either config (or
211
+ `--max-lifetime-hours` on the CLI) and it maps to `terminate_after` on RunPod
212
+ and `timeout` on Modal, taking precedence over those fields.
213
+
214
+ ## Optional: persist results to GCS
215
+
216
+ Off by default. Pass `gcs_base` (or `--gcs-base`) to upload the pulled results
217
+ to Google Cloud Storage from your machine (credentials never touch the pod):
218
+
219
+ ```python
220
+ RunSpec(slug="demo", codebase="./code", run="python go.py",
221
+ gcs_base="gs://your-bucket/experiments")
222
+ # res.gcs_uri and res.retrieve_cmd are populated
223
+ ```
224
+
225
+ ## Typed errors
226
+
227
+ `BellhopError` subclasses let you branch on failure mode:
228
+ `PreflightError` (bad config / missing key / `modal` not installed),
229
+ `ProvisionError` (pod or sandbox create failed), `PodNotReadyError` (never became
230
+ functional), `RemoteJobError` (carries `.remote_exit` + `.log_tail`),
231
+ `ResultsMissingError`, `GcsUploadError`. (`RunpodError` is a back-compat alias
232
+ for `BellhopError`.)
233
+
234
+ ## Notes
235
+
236
+ - Code/result transfer is **tar-over-ssh** on RunPod and **tar-over-exec** on
237
+ Modal — only needs `tar` in the image (no rsync; on RunPod also `ssh`).
238
+ - Env vars passed to `exec(env=...)` never appear in the box's process list:
239
+ RunPod exports them inside a script fed over stdin; Modal passes them over its
240
+ API, not argv.
241
+ - On out-of-stock, a RunPod `COMMUNITY` request retries on `SECURE` automatically
242
+ (toggle with `cloud_fallback=False`).
243
+ - The Modal default image is `debian_slim` with `git` + `tar`; add packages with
244
+ `ModalConfig(pip=[...], apt=[...])`, or supply your own `modal.Image` /
245
+ registry ref (assumed to already have `tar`).
246
+
247
+ ## Development
248
+
249
+ ```bash
250
+ pip install -e ".[dev]"
251
+ pytest # offline unit tests (no pod/sandbox, no cost)
252
+ RUNPOD_LIVE=1 pytest tests/integration_live.py -s # billed RunPod end-to-end test
253
+ MODAL_LIVE=1 pytest tests/integration_modal.py -s # billed Modal end-to-end test
254
+ ```
255
+
256
+ ## License
257
+
258
+ MIT
@@ -0,0 +1,16 @@
1
+ import os
2
+ import pathlib
3
+ import subprocess
4
+
5
+ pathlib.Path("results").mkdir(exist_ok=True)
6
+ secret = os.environ.get("MY_SECRET", "<MISSING>")
7
+ msg = f"job ran on pod; MY_SECRET={secret}"
8
+ print(msg)
9
+ pathlib.Path("results/out.txt").write_text(msg + "\n")
10
+
11
+ try:
12
+ g = subprocess.check_output(["nvidia-smi", "-L"], text=True).strip()
13
+ except Exception as e:
14
+ g = f"no nvidia-smi: {e}"
15
+ print("GPU:", g)
16
+ pathlib.Path("results/gpu.txt").write_text(g + "\n")
@@ -0,0 +1,42 @@
1
+ [project]
2
+ # Distribution name is bellhop-py ("bellhop" is squatted on PyPI by an
3
+ # unrelated 2022 package); the import name and CLI remain `bellhop`.
4
+ name = "bellhop-py"
5
+ version = "0.3.0"
6
+ description = "Async Python library that checks your code into an ephemeral box (RunPod pod or Modal sandbox), runs it, brings the results back, and checks out."
7
+ readme = "README.md"
8
+ license = "MIT"
9
+ authors = [{ name = "Daniel Tan" }]
10
+ requires-python = ">=3.10"
11
+ dependencies = ["httpx>=0.27"]
12
+ keywords = ["runpod", "modal", "gpu", "ephemeral", "compute", "sandbox"]
13
+ classifiers = [
14
+ "Development Status :: 4 - Beta",
15
+ "Framework :: AsyncIO",
16
+ "Intended Audience :: Developers",
17
+ "Programming Language :: Python :: 3",
18
+ "Topic :: Software Development :: Libraries",
19
+ "Topic :: System :: Distributed Computing",
20
+ ]
21
+
22
+ [project.urls]
23
+ Homepage = "https://github.com/dtch1997/bellhop"
24
+ Repository = "https://github.com/dtch1997/bellhop"
25
+ Issues = "https://github.com/dtch1997/bellhop/issues"
26
+
27
+ [project.scripts]
28
+ bellhop = "bellhop.cli:main"
29
+
30
+ [project.optional-dependencies]
31
+ modal = ["modal>=1.0"]
32
+ dev = ["pytest>=8", "pytest-asyncio>=0.23", "modal>=1.0"]
33
+
34
+ [build-system]
35
+ requires = ["hatchling"]
36
+ build-backend = "hatchling.build"
37
+
38
+ [tool.hatch.build.targets.wheel]
39
+ packages = ["src/bellhop"]
40
+
41
+ [tool.pytest.ini_options]
42
+ asyncio_mode = "auto"
@@ -0,0 +1,34 @@
1
+ """bellhop: check your code into an ephemeral box (RunPod pod or Modal sandbox), run it, bring results back, check out."""
2
+
3
+ from .backend import ExecBox, ExecResult, open_box
4
+ from .errors import (
5
+ BellhopError,
6
+ GcsUploadError,
7
+ PodNotReadyError,
8
+ PreflightError,
9
+ ProvisionError,
10
+ RemoteJobError,
11
+ ResultsMissingError,
12
+ RunpodError,
13
+ )
14
+ from .graphql import RunpodGraphQL
15
+ from .modal_box import ModalConfig, Sandbox, sandbox
16
+ from .pod import GPU_ALIASES, IMAGE_PRESETS, Pod, PodConfig, pod
17
+ from .probes import HttpProbe, LogMarkerProbe, ReadyProbe, SshProbe, TcpProbe
18
+ from .rest import RunpodRest
19
+ from .run import RunResult, RunSpec, run, run_many
20
+
21
+ __all__ = [
22
+ # backend-agnostic surface
23
+ "run", "run_many", "RunSpec", "RunResult",
24
+ "open_box", "ExecBox", "ExecResult",
25
+ # RunPod backend
26
+ "pod", "Pod", "PodConfig", "IMAGE_PRESETS", "GPU_ALIASES",
27
+ "RunpodRest", "RunpodGraphQL",
28
+ "ReadyProbe", "SshProbe", "TcpProbe", "HttpProbe", "LogMarkerProbe",
29
+ # Modal backend
30
+ "sandbox", "Sandbox", "ModalConfig",
31
+ # errors
32
+ "BellhopError", "RunpodError", "PreflightError", "ProvisionError", "PodNotReadyError",
33
+ "RemoteJobError", "ResultsMissingError", "GcsUploadError",
34
+ ]