gpu-dev 0.5.3__tar.gz → 0.5.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/PKG-INFO +1 -1
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/PKG-INFO +1 -1
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/interactive.py +16 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/pyproject.toml +1 -1
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda.tf +1 -1
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/.github/workflows/no-gitlinks.yml +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/.github/workflows/publish.yml +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/.gitignore +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/CLAUDE.md +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/PROGRESS.md +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/PR_DESCRIPTION.md +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/TODO.md +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/admin/README.md +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/admin/generate_stats.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/admin/requirements.txt +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/README.md +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/ZERO_CONFIG_SETUP.md +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/SOURCES.txt +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/dependency_links.txt +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/entry_points.txt +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/requires.txt +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/top_level.txt +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/__init__.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/auth.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/cli.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/config.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/disks.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/name_generator.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/reservations.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/ssh_proxy.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/gpu-dev-cli/minimal-iam-policy.json +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/cli-tools/scripts/clear_stale_disk_locks.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/docs/USER_GUIDE.md +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/docs/devgpu-features.html +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/docs/docker-mark-blue.svg +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/docs/icons8-cursor-ai.svg +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/post.md +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/setup.cfg +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/.claude/skills/deploy.md +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/.terraform.lock.hcl +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/README.md +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/alb.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/availability.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/backend.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/.dockerignore +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/Dockerfile +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/backup-dotfiles +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/bash_profile +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/bashrc +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/bashrc_ext +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/build-with-efa.sh +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/dotfiles-shutdown-handler +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/list-dotfile-versions +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/motd_script +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/nproc_wrapper +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/profile +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/restore-dotfiles +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/restore-dotfiles-version +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/setup-dotfiles-persistence +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/shell_env +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/ssh_config +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/zprofile +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/zshrc +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker/zshrc_ext +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker-build.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker-example/Dockerfile +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/docker-example/hello.txt +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/ecr.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/efs.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/eks.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/expiry.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/git-cache.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/kubernetes.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/availability_updater/index.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/availability_updater/requirements.txt +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/migration/tag_largest_snapshots.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/reservation_expiry/index.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/reservation_expiry/requirements.txt +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/reservation_processor/buildkit_job.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/reservation_processor/index.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/reservation_processor/requirements.txt +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/shared/__init__.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/shared/alb_utils.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/shared/dns_utils.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/shared/k8s_client.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/shared/k8s_resource_tracker.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/shared/requirements.txt +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/shared/snapshot_utils.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/main.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py.bak +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/migrations/check_snapshots.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/migrations/migrate_disks_to_named.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/migrations/run_backfill.sh +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/monitoring.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/outputs.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/pyproject.toml +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/queue.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/route53.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/s3-disk-contents.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/scripts/CLEANUP_GUIDE.md +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/scripts/detect_empty_volumes.sh +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/scripts/ec2_avail_probe.sh +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/scripts/inspect_user_data.sh +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/ssh-proxy/Dockerfile +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/ssh-proxy/proxy.py +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/ssh-proxy/requirements.txt +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/ssh-proxy-service.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/ssh-proxy.tf +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/switch-to.sh +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/templates/al2023-cpu-user-data.sh +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/templates/al2023-user-data.sh +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/templates/user-data-self-managed.sh +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/templates/user-data.sh +0 -0
- {gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/variables.tf +0 -0
|
@@ -189,6 +189,12 @@ def select_gpu_count_interactive(
|
|
|
189
189
|
# Add multinode options
|
|
190
190
|
multinode_counts = [16, 24, 32, 40, 48] # multiples of 8
|
|
191
191
|
|
|
192
|
+
# Pull live availability for the parent SKU once — used to annotate every option.
|
|
193
|
+
parent_info = (availability_info or {}).get(gpu_type, {}) if availability_info else {}
|
|
194
|
+
parent_max_reservable = int(parent_info.get("max_reservable", 0))
|
|
195
|
+
parent_full_nodes = int(parent_info.get("full_nodes_available", 0))
|
|
196
|
+
parent_available = int(parent_info.get("available", 0))
|
|
197
|
+
|
|
192
198
|
# MIG slice submenu: only for h100. Each tuple is (target_gpu_type, gpu_count, gb_label).
|
|
193
199
|
mig_options = []
|
|
194
200
|
if gpu_type == "h100":
|
|
@@ -237,6 +243,11 @@ def select_gpu_count_interactive(
|
|
|
237
243
|
label = f"1 GPU (single node)"
|
|
238
244
|
else:
|
|
239
245
|
label = f"{count} GPUs (single node)"
|
|
246
|
+
if parent_info:
|
|
247
|
+
if parent_max_reservable >= count:
|
|
248
|
+
label += f" [{parent_available} free]"
|
|
249
|
+
else:
|
|
250
|
+
label += " [unavailable now]"
|
|
240
251
|
choices.append(questionary.Choice(title=label, value=count))
|
|
241
252
|
|
|
242
253
|
# Multinode at the bottom.
|
|
@@ -246,6 +257,11 @@ def select_gpu_count_interactive(
|
|
|
246
257
|
for count in multinode_counts:
|
|
247
258
|
nodes = count // max_gpus
|
|
248
259
|
label = f"{count} GPUs ({nodes} nodes × {max_gpus} GPUs)"
|
|
260
|
+
if parent_info:
|
|
261
|
+
if parent_max_reservable >= count:
|
|
262
|
+
label += f" [{parent_full_nodes} full nodes free]"
|
|
263
|
+
else:
|
|
264
|
+
label += " [unavailable now]"
|
|
249
265
|
choices.append(questionary.Choice(title=label, value=count))
|
|
250
266
|
|
|
251
267
|
try:
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "gpu-dev"
|
|
7
|
-
version = "0.5.
|
|
7
|
+
version = "0.5.4"
|
|
8
8
|
description = "CLI tool for PyTorch GPU developer server reservations"
|
|
9
9
|
authors = [{name = "PyTorch Team"}]
|
|
10
10
|
readme = "cli-tools/gpu-dev-cli/README.md"
|
|
@@ -180,7 +180,7 @@ resource "aws_lambda_function" "reservation_processor" {
|
|
|
180
180
|
HOSTED_ZONE_ID = local.effective_domain_name != "" ? local.hosted_zone_id : ""
|
|
181
181
|
SSH_DOMAIN_MAPPINGS_TABLE = local.effective_domain_name != "" ? aws_dynamodb_table.ssh_domain_mappings.name : ""
|
|
182
182
|
SSL_CERTIFICATE_ARN = local.effective_domain_name != "" ? aws_acm_certificate.wildcard[0].arn : ""
|
|
183
|
-
LAMBDA_VERSION = "0.5.
|
|
183
|
+
LAMBDA_VERSION = "0.5.4"
|
|
184
184
|
MIN_CLI_VERSION = "0.5.2"
|
|
185
185
|
DISK_CONTENTS_BUCKET = aws_s3_bucket.disk_contents.bucket
|
|
186
186
|
OPERATIONS_TABLE = aws_dynamodb_table.operations.name
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/availability_updater/index.py
RENAMED
|
File without changes
|
|
File without changes
|
{gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/migration/tag_largest_snapshots.py
RENAMED
|
File without changes
|
|
File without changes
|
{gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/reservation_expiry/requirements.txt
RENAMED
|
File without changes
|
|
File without changes
|
{gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/reservation_processor/index.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/lambda/shared/k8s_resource_tracker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/migrations/migrate_disks_to_named.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.5.3 → gpu_dev-0.5.4}/terraform-gpu-devservers/templates/user-data-self-managed.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|