gpu-gate 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ name: ci
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+
8
+ jobs:
9
+ test:
10
+ runs-on: ubuntu-latest
11
+ strategy:
12
+ fail-fast: false
13
+ matrix:
14
+ python-version: ["3.10", "3.11", "3.12"]
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - name: Install uv
18
+ uses: astral-sh/setup-uv@v3
19
+ with:
20
+ python-version: ${{ matrix.python-version }}
21
+ - name: Sync dependencies
22
+ run: uv sync --all-extras --dev
23
+ - name: Lint
24
+ run: uv run ruff check .
25
+ - name: Format check
26
+ run: uv run ruff format --check .
27
+ - name: Test
28
+ run: uv run pytest --cov --cov-report=term-missing
@@ -0,0 +1,21 @@
1
+ name: publish
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+ workflow_dispatch:
7
+
8
+ jobs:
9
+ pypi:
10
+ runs-on: ubuntu-latest
11
+ environment: pypi
12
+ permissions:
13
+ id-token: write
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+ - name: Install uv
17
+ uses: astral-sh/setup-uv@v3
18
+ - name: Build
19
+ run: uv build
20
+ - name: Publish to PyPI
21
+ run: uv publish
@@ -0,0 +1,26 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .eggs/
6
+ build/
7
+ dist/
8
+
9
+ # Virtual environments
10
+ .venv/
11
+ venv/
12
+
13
+ # uv
14
+ uv.lock
15
+
16
+ # Test and coverage
17
+ .pytest_cache/
18
+ .coverage
19
+ .coverage.*
20
+ htmlcov/
21
+ .ruff_cache/
22
+
23
+ # Editor / OS
24
+ .vscode/
25
+ .idea/
26
+ .DS_Store
@@ -0,0 +1,15 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.6.9
4
+ hooks:
5
+ - id: ruff
6
+ args: [--fix]
7
+ - id: ruff-format
8
+ - repo: https://github.com/pre-commit/pre-commit-hooks
9
+ rev: v4.6.0
10
+ hooks:
11
+ - id: end-of-file-fixer
12
+ - id: trailing-whitespace
13
+ - id: check-yaml
14
+ - id: check-toml
15
+ - id: check-merge-conflict
@@ -0,0 +1,30 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented here. The format is based
4
+ on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project
5
+ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [0.2.0] - 2026-05-10
8
+
9
+ ### Added
10
+ - Docker image and a published container entry point.
11
+ - Continuous integration across Python 3.10, 3.11 and 3.12 (lint, format
12
+ check and tests).
13
+ - Expanded documentation and usage examples.
14
+
15
+ ### Changed
16
+ - Hardened packaging metadata and pinned the supported Python versions.
17
+
18
+ ## [0.1.0] - 2026-05-07
19
+
20
+ ### Added
21
+ - `run` command: wait for a free GPU, claim it with a cooperative lock, set
22
+ `CUDA_VISIBLE_DEVICES`, and exec the wrapped command.
23
+ - `wait` command: block until a GPU is free and print the chosen index.
24
+ - `status` command: list visible GPUs with free memory and utilization, with
25
+ optional JSON output.
26
+ - Filters for memory, utilization and explicit index include/exclude.
27
+ - Advisory per-device file locks to avoid two runs grabbing the same card.
28
+
29
+ [0.2.0]: https://github.com/jmweb-org/gpu-gate/releases/tag/v0.2.0
30
+ [0.1.0]: https://github.com/jmweb-org/gpu-gate/releases/tag/v0.1.0
@@ -0,0 +1,20 @@
1
+ # gpu-gate runs on the host's NVIDIA driver via NVML. Build a small image and
2
+ # run it with `--gpus all` so the container can see the cards:
3
+ #
4
+ # docker build -t gpu-gate .
5
+ # docker run --rm --gpus all gpu-gate status
6
+ #
7
+ FROM python:3.12-slim
8
+
9
+ LABEL org.opencontainers.image.source="https://github.com/jmweb-org/gpu-gate"
10
+ LABEL org.opencontainers.image.description="Wait for a free GPU, claim it, and run a command on it."
11
+ LABEL org.opencontainers.image.licenses="MIT"
12
+
13
+ WORKDIR /app
14
+ COPY pyproject.toml README.md LICENSE ./
15
+ COPY src ./src
16
+
17
+ RUN pip install --no-cache-dir .
18
+
19
+ ENTRYPOINT ["gpu-gate"]
20
+ CMD ["--help"]
gpu_gate-0.2.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 José del Río
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,153 @@
1
+ Metadata-Version: 2.4
2
+ Name: gpu-gate
3
+ Version: 0.2.0
4
+ Summary: Wait for a free GPU, claim it, and run a command on it.
5
+ Project-URL: Homepage, https://github.com/jmweb-org/gpu-gate
6
+ Project-URL: Repository, https://github.com/jmweb-org/gpu-gate
7
+ Project-URL: Issues, https://github.com/jmweb-org/gpu-gate/issues
8
+ Author: José del Río
9
+ License: MIT License
10
+
11
+ Copyright (c) 2026 José del Río
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Keywords: cli,cuda,gpu,nvidia,nvml,scheduler
32
+ Classifier: Development Status :: 4 - Beta
33
+ Classifier: Environment :: GPU :: NVIDIA CUDA
34
+ Classifier: Intended Audience :: Developers
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Operating System :: POSIX :: Linux
37
+ Classifier: Programming Language :: Python :: 3.10
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Topic :: System :: Distributed Computing
41
+ Classifier: Topic :: Utilities
42
+ Requires-Python: >=3.10
43
+ Requires-Dist: filelock>=3.12
44
+ Requires-Dist: nvidia-ml-py>=12.535
45
+ Requires-Dist: rich>=13.0
46
+ Requires-Dist: typer>=0.12
47
+ Description-Content-Type: text/markdown
48
+
49
+ # gpu-gate
50
+
51
+ [![CI](https://github.com/jmweb-org/gpu-gate/actions/workflows/ci.yml/badge.svg)](https://github.com/jmweb-org/gpu-gate/actions/workflows/ci.yml)
52
+ [![PyPI](https://img.shields.io/pypi/v/gpu-gate.svg)](https://pypi.org/project/gpu-gate/)
53
+ [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org)
54
+ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
55
+
56
+ Wait for a free GPU, claim it, set `CUDA_VISIBLE_DEVICES`, and run your command.
57
+
58
+ On a shared multi-GPU box without a cluster scheduler, starting a job usually
59
+ means watching `nvidia-smi`, picking a card by hand, exporting the env var, and
60
+ remembering to actually launch. `gpu-gate` is the small wait-pick-export-run
61
+ loop that does this for you, with a cooperative lock so two invocations on the
62
+ same host do not grab the same just-freed card. No daemon, no server, nothing
63
+ to administer.
64
+
65
+ ```console
66
+ $ gpu-gate run --min-free-mb 8000 -- python train.py
67
+ gpu-gate: waiting for a free GPU ...
68
+ # ... blocks until a card has >= 8 GB free, then runs train.py with
69
+ # CUDA_VISIBLE_DEVICES set to the chosen index
70
+ ```
71
+
72
+ ## Install
73
+
74
+ ```console
75
+ $ pip install gpu-gate # from PyPI, once released
76
+ $ pip install git+https://github.com/jmweb-org/gpu-gate # latest, available now
77
+ ```
78
+
79
+ It requires an NVIDIA driver at run time. The NVML binding
80
+ (`nvidia-ml-py`) is pulled in automatically; the package still installs and
81
+ imports on machines without a GPU, so it is safe to add to shared requirements.
82
+
83
+ ## Usage
84
+
85
+ ### Run a command on a free GPU
86
+
87
+ ```console
88
+ $ gpu-gate run -n 1 --min-free-mb 8000 -- python train.py --epochs 50
89
+ ```
90
+
91
+ Everything after `--` is the command. `gpu-gate` blocks until the requirements
92
+ are met, claims the chosen device(s), exports `CUDA_VISIBLE_DEVICES`, and execs
93
+ the command. Its own exit code is the command's exit code, so it drops cleanly
94
+ into scripts and CI.
95
+
96
+ Common options:
97
+
98
+ | Option | Meaning |
99
+ | --- | --- |
100
+ | `-n, --count` | Number of GPUs to claim (default 1) |
101
+ | `--min-free-mb` | Require at least this much free memory |
102
+ | `--max-util` | Skip cards busier than this percent |
103
+ | `--only 0,1` | Restrict the search to these indices |
104
+ | `--exclude 2,3` | Never pick these indices |
105
+ | `--poll` | Seconds between checks (default 5) |
106
+ | `--timeout` | Give up after N seconds (exit 124) |
107
+
108
+ ### Just wait, then use the result yourself
109
+
110
+ ```console
111
+ $ export CUDA_VISIBLE_DEVICES=$(gpu-gate wait --min-free-mb 8000)
112
+ ```
113
+
114
+ ### Inspect the current state
115
+
116
+ ```console
117
+ $ gpu-gate status
118
+ idx name free total util
119
+ 0 NVIDIA L40S 44211 MiB 46068 MiB 3%
120
+ 1 NVIDIA L40S 812 MiB 46068 MiB 97%
121
+
122
+ $ gpu-gate status --json
123
+ ```
124
+
125
+ ## Exit codes
126
+
127
+ | Code | Meaning |
128
+ | --- | --- |
129
+ | 0 | Command ran (its own code is forwarded) |
130
+ | 2 | Bad invocation (for example, no command after `--`) |
131
+ | 124 | Timed out waiting for a GPU |
132
+ | 3 | Requirements could never be met |
133
+ | 4 | Could not read GPU state (no driver / NVML error) |
134
+
135
+ ## How selection works
136
+
137
+ A GPU is eligible when it has enough free memory, is below the utilization
138
+ ceiling, is not excluded, and is not currently locked by another `gpu-gate`
139
+ caller. Eligible cards are ranked by most free memory, then lowest
140
+ utilization, then index, and the top `--count` are chosen. The ordering is
141
+ fully deterministic.
142
+
143
+ ## Locking
144
+
145
+ While a command runs, `gpu-gate` holds an advisory file lock per claimed
146
+ device under `$GPU_GATE_LOCK_DIR` (a per-user directory by default). Other
147
+ `gpu-gate` invocations skip locked devices, which avoids the classic race where
148
+ two jobs both see the same card free at the same instant. The lock is advisory:
149
+ it coordinates `gpu-gate` callers, not arbitrary CUDA programs.
150
+
151
+ ## License
152
+
153
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,105 @@
1
+ # gpu-gate
2
+
3
+ [![CI](https://github.com/jmweb-org/gpu-gate/actions/workflows/ci.yml/badge.svg)](https://github.com/jmweb-org/gpu-gate/actions/workflows/ci.yml)
4
+ [![PyPI](https://img.shields.io/pypi/v/gpu-gate.svg)](https://pypi.org/project/gpu-gate/)
5
+ [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org)
6
+ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
7
+
8
+ Wait for a free GPU, claim it, set `CUDA_VISIBLE_DEVICES`, and run your command.
9
+
10
+ On a shared multi-GPU box without a cluster scheduler, starting a job usually
11
+ means watching `nvidia-smi`, picking a card by hand, exporting the env var, and
12
+ remembering to actually launch. `gpu-gate` is the small wait-pick-export-run
13
+ loop that does this for you, with a cooperative lock so two invocations on the
14
+ same host do not grab the same just-freed card. No daemon, no server, nothing
15
+ to administer.
16
+
17
+ ```console
18
+ $ gpu-gate run --min-free-mb 8000 -- python train.py
19
+ gpu-gate: waiting for a free GPU ...
20
+ # ... blocks until a card has >= 8 GB free, then runs train.py with
21
+ # CUDA_VISIBLE_DEVICES set to the chosen index
22
+ ```
23
+
24
+ ## Install
25
+
26
+ ```console
27
+ $ pip install gpu-gate # from PyPI, once released
28
+ $ pip install git+https://github.com/jmweb-org/gpu-gate # latest, available now
29
+ ```
30
+
31
+ It requires an NVIDIA driver at run time. The NVML binding
32
+ (`nvidia-ml-py`) is pulled in automatically; the package still installs and
33
+ imports on machines without a GPU, so it is safe to add to shared requirements.
34
+
35
+ ## Usage
36
+
37
+ ### Run a command on a free GPU
38
+
39
+ ```console
40
+ $ gpu-gate run -n 1 --min-free-mb 8000 -- python train.py --epochs 50
41
+ ```
42
+
43
+ Everything after `--` is the command. `gpu-gate` blocks until the requirements
44
+ are met, claims the chosen device(s), exports `CUDA_VISIBLE_DEVICES`, and execs
45
+ the command. Its own exit code is the command's exit code, so it drops cleanly
46
+ into scripts and CI.
47
+
48
+ Common options:
49
+
50
+ | Option | Meaning |
51
+ | --- | --- |
52
+ | `-n, --count` | Number of GPUs to claim (default 1) |
53
+ | `--min-free-mb` | Require at least this much free memory |
54
+ | `--max-util` | Skip cards busier than this percent |
55
+ | `--only 0,1` | Restrict the search to these indices |
56
+ | `--exclude 2,3` | Never pick these indices |
57
+ | `--poll` | Seconds between checks (default 5) |
58
+ | `--timeout` | Give up after N seconds (exit 124) |
59
+
60
+ ### Just wait, then use the result yourself
61
+
62
+ ```console
63
+ $ export CUDA_VISIBLE_DEVICES=$(gpu-gate wait --min-free-mb 8000)
64
+ ```
65
+
66
+ ### Inspect the current state
67
+
68
+ ```console
69
+ $ gpu-gate status
70
+ idx name free total util
71
+ 0 NVIDIA L40S 44211 MiB 46068 MiB 3%
72
+ 1 NVIDIA L40S 812 MiB 46068 MiB 97%
73
+
74
+ $ gpu-gate status --json
75
+ ```
76
+
77
+ ## Exit codes
78
+
79
+ | Code | Meaning |
80
+ | --- | --- |
81
+ | 0 | Command ran (its own code is forwarded) |
82
+ | 2 | Bad invocation (for example, no command after `--`) |
83
+ | 124 | Timed out waiting for a GPU |
84
+ | 3 | Requirements could never be met |
85
+ | 4 | Could not read GPU state (no driver / NVML error) |
86
+
87
+ ## How selection works
88
+
89
+ A GPU is eligible when it has enough free memory, is below the utilization
90
+ ceiling, is not excluded, and is not currently locked by another `gpu-gate`
91
+ caller. Eligible cards are ranked by most free memory, then lowest
92
+ utilization, then index, and the top `--count` are chosen. The ordering is
93
+ fully deterministic.
94
+
95
+ ## Locking
96
+
97
+ While a command runs, `gpu-gate` holds an advisory file lock per claimed
98
+ device under `$GPU_GATE_LOCK_DIR` (a per-user directory by default). Other
99
+ `gpu-gate` invocations skip locked devices, which avoids the classic race where
100
+ two jobs both see the same card free at the same instant. The lock is advisory:
101
+ it coordinates `gpu-gate` callers, not arbitrary CUDA programs.
102
+
103
+ ## License
104
+
105
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,69 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "gpu-gate"
7
+ version = "0.2.0"
8
+ description = "Wait for a free GPU, claim it, and run a command on it."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { file = "LICENSE" }
12
+ authors = [{ name = "José del Río" }]
13
+ keywords = ["gpu", "cuda", "nvidia", "nvml", "scheduler", "cli"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Environment :: GPU :: NVIDIA CUDA",
17
+ "Intended Audience :: Developers",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Operating System :: POSIX :: Linux",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Topic :: System :: Distributed Computing",
24
+ "Topic :: Utilities",
25
+ ]
26
+ dependencies = [
27
+ "typer>=0.12",
28
+ "rich>=13.0",
29
+ "filelock>=3.12",
30
+ "nvidia-ml-py>=12.535",
31
+ ]
32
+
33
+ [project.urls]
34
+ Homepage = "https://github.com/jmweb-org/gpu-gate"
35
+ Repository = "https://github.com/jmweb-org/gpu-gate"
36
+ Issues = "https://github.com/jmweb-org/gpu-gate/issues"
37
+
38
+ [project.scripts]
39
+ gpu-gate = "gpu_gate.cli:entrypoint"
40
+
41
+ [dependency-groups]
42
+ dev = [
43
+ "pytest>=8.0",
44
+ "pytest-cov>=5.0",
45
+ "ruff>=0.6",
46
+ ]
47
+
48
+ [tool.hatch.build.targets.wheel]
49
+ packages = ["src/gpu_gate"]
50
+
51
+ [tool.pytest.ini_options]
52
+ addopts = "-q"
53
+ testpaths = ["tests"]
54
+ pythonpath = ["."]
55
+
56
+ [tool.ruff]
57
+ line-length = 100
58
+ target-version = "py310"
59
+ src = ["src", "tests"]
60
+
61
+ [tool.ruff.lint]
62
+ select = ["E", "F", "I", "UP", "B", "S", "C4", "RUF"]
63
+
64
+ [tool.ruff.lint.per-file-ignores]
65
+ "tests/*" = ["S101"]
66
+
67
+ [tool.coverage.run]
68
+ source = ["gpu_gate"]
69
+ branch = true
@@ -0,0 +1,15 @@
1
+ """gpu-gate: wait for a free GPU, claim it, and run a command on it."""
2
+
3
+ from gpu_gate.models import GpuStatus, Requirements, Selection
4
+ from gpu_gate.selector import NotEnoughGPUs, select
5
+
6
+ __version__ = "0.2.0"
7
+
8
+ __all__ = [
9
+ "GpuStatus",
10
+ "NotEnoughGPUs",
11
+ "Requirements",
12
+ "Selection",
13
+ "__version__",
14
+ "select",
15
+ ]
@@ -0,0 +1,4 @@
1
+ from gpu_gate.cli import entrypoint
2
+
3
+ if __name__ == "__main__":
4
+ entrypoint()