gpu-dev 0.7.2__tar.gz → 0.7.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/PKG-INFO +1 -1
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/interactive.py +97 -157
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/gpu_dev.egg-info/PKG-INFO +1 -1
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/pyproject.toml +1 -1
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/src/gpu_dev/__init__.py +1 -1
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/.github/workflows/no-gitlinks.yml +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/.github/workflows/publish.yml +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/.gitignore +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/CLAUDE.md +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/README.md +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/admin/README.md +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/admin/generate_stats.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/admin/requirements.txt +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/cli-tools/gpu-dev-cli/README.md +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/cli-tools/gpu-dev-cli/ZERO_CONFIG_SETUP.md +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/__init__.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/auth.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/cli.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/config.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/disks.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/name_generator.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/reservations.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/cli-tools/gpu-dev-cli/gpu_dev_cli/ssh_proxy.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/cli-tools/gpu-dev-cli/minimal-iam-policy.json +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/cli-tools/scripts/clear_stale_disk_locks.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/docs/SDK_REPRO.md +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/docs/USER_GUIDE.md +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/docs/devgpu-features.html +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/docs/docker-mark-blue.svg +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/docs/icons8-cursor-ai.svg +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/gpu_dev.egg-info/SOURCES.txt +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/gpu_dev.egg-info/dependency_links.txt +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/gpu_dev.egg-info/entry_points.txt +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/gpu_dev.egg-info/requires.txt +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/gpu_dev.egg-info/top_level.txt +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/architecture.html +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/cli-demo.html +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/devgpu-features.html +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/docker-mark-blue.svg +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/feedback.png +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/gpu-fleet.html +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/icons8-cursor-ai.svg +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/index.html +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/k8s-under-the-hood.html +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/multinode.html +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/osdc-future-plans.html +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/problem.png +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/sandbox.html +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/sdk-demo.html +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/thesis.html +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/title-vid.mp4 +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/weneedgpus.png +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/presentation/wow.html +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/README.md +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/examples/batch_multi_gpu.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/examples/interactive_debug.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/examples/parallel_experiments.ipynb +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/examples/quickstart.ipynb +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/examples/run_tests.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/examples/submit_job.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/src/gpu_dev/_async/__init__.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/src/gpu_dev/_backend/__init__.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/src/gpu_dev/_backend/aws.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/src/gpu_dev/_backend/protocol.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/src/gpu_dev/_sync/__init__.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/src/gpu_dev/_sync/client.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/src/gpu_dev/_sync/sandbox.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/src/gpu_dev/_transport/__init__.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/src/gpu_dev/_transport/ssh.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/src/gpu_dev/common/__init__.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/src/gpu_dev/common/config.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/src/gpu_dev/common/enums.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/src/gpu_dev/common/errors.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/src/gpu_dev/common/models.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/src/gpu_dev/py.typed +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/tests/__init__.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/sdk/python/tests/test_models.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/setup.cfg +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-deck/backend.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-deck/main.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-deck/terraform.tfvars.example +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/.claude/skills/deploy.md +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/.terraform.lock.hcl +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/README.md +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/alb.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/ami-baker.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/availability.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/backend.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/build-node.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/check_b200.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/cluster-autoscaler.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/cmd_proxy.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/.dockerignore +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/Dockerfile +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/backup-dotfiles +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/bash_profile +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/bashrc +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/bashrc_ext +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/build-with-efa.sh +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/dotfiles-shutdown-handler +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/list-dotfile-versions +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/motd_script +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/nproc_wrapper +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/profile +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/restore-dotfiles +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/restore-dotfiles-version +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/setup-dotfiles-persistence +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/shell_env +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/ssh_config +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/zprofile +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/zshrc +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker/zshrc_ext +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker-build.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker-example/Dockerfile +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/docker-example/hello.txt +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/ecr.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/efs.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/eks.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/expiry.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/git-cache.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/gpu-dev-pod-irsa.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/kubernetes.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/availability_updater/index.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/availability_updater/requirements.txt +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/migration/tag_largest_snapshots.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/reservation_expiry/index.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/reservation_expiry/requirements.txt +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/reservation_processor/buildkit_job.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/reservation_processor/index.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/reservation_processor/requirements.txt +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/shared/__init__.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/shared/alb_utils.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/shared/dns_utils.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/shared/k8s_client.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/shared/k8s_resource_tracker.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/shared/requirements.txt +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/shared/snapshot_utils.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/list_b200.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/main.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/mig-config.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/mig-parted-config.yaml +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py.bak +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/migrations/check_snapshots.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/migrations/migrate_disks_to_named.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/migrations/run_backfill.sh +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/monitoring.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/node-termination-handler.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/outputs.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/pyproject.toml +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/pytorch-prebuild.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/queue.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/route53.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/s3-disk-contents.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/scripts/CLEANUP_GUIDE.md +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/scripts/detect_empty_volumes.sh +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/scripts/ec2_avail_probe.sh +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/scripts/inspect_user_data.sh +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/ssh-proxy/Dockerfile +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/ssh-proxy/proxy.py +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/ssh-proxy/requirements.txt +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/ssh-proxy-service.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/ssh-proxy.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/subnet-0fe3a2c45570091ad +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/switch-to.sh +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/templates/al2023-cpu-user-data.sh +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/templates/al2023-user-data.sh +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/templates/ami-baker-user-data.sh +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/templates/user-data-self-managed.sh +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/templates/user-data.sh +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/variables.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/warm-pool.tf +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/tests/submit/README.md +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/tests/submit/fail/run.sh +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/tests/submit/multinode/run.sh +0 -0
- {gpu_dev-0.7.2 → gpu_dev-0.7.4}/tests/submit/success/run.sh +0 -0
|
@@ -89,20 +89,6 @@ def select_gpu_type_interactive(
|
|
|
89
89
|
if "-mig-" not in gt and not (_hide_spot and _is_spot_type(gt))
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
-
# Aggregate MIG slice availability per parent type, hinted on the h100/b200 rows.
|
|
93
|
-
def _mig_aggregates(parent: str):
|
|
94
|
-
avail = sum(
|
|
95
|
-
int(info.get("available", 0))
|
|
96
|
-
for gt, info in (availability_info or {}).items()
|
|
97
|
-
if gt.startswith(f"{parent}-mig-")
|
|
98
|
-
)
|
|
99
|
-
cap = sum(
|
|
100
|
-
int(info.get("total", 0))
|
|
101
|
-
for gt, info in (availability_info or {}).items()
|
|
102
|
-
if gt.startswith(f"{parent}-mig-")
|
|
103
|
-
)
|
|
104
|
-
return avail, cap
|
|
105
|
-
|
|
106
92
|
# Detect spot types and fetch cross-region spot availability
|
|
107
93
|
from .config import Config, load_config
|
|
108
94
|
_cfg = load_config()
|
|
@@ -142,16 +128,11 @@ def select_gpu_type_interactive(
|
|
|
142
128
|
except Exception as e:
|
|
143
129
|
pass # east1 not accessible — show without spot
|
|
144
130
|
|
|
145
|
-
#
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
for gt, info in visible_info.items():
|
|
149
|
-
if "mig" in gt:
|
|
150
|
-
mig_gpus[gt] = info
|
|
151
|
-
else:
|
|
152
|
-
full_gpus[gt] = info
|
|
131
|
+
# visible_info already excludes -mig- SKUs and (when hidden) spot, so these are
|
|
132
|
+
# all "full" rows; MIG slices render as a sub-row under their parent.
|
|
133
|
+
full_gpus = dict(visible_info)
|
|
153
134
|
|
|
154
|
-
# Spot types from cross-region (prod-east1)
|
|
135
|
+
# Spot types from cross-region (prod-east1).
|
|
155
136
|
spot_gpus = {k: v for k, v in spot_region_info.items() if k in _spot_types}
|
|
156
137
|
|
|
157
138
|
def _format_wait(available, est_wait):
|
|
@@ -166,11 +147,6 @@ def select_gpu_type_interactive(
|
|
|
166
147
|
return f"{h}h{f' {m}min' if m else ''}", "⏳"
|
|
167
148
|
return "Unknown", "⚠️"
|
|
168
149
|
|
|
169
|
-
def _format_avail(available, is_maintenance, maintenance_reason):
|
|
170
|
-
if is_maintenance:
|
|
171
|
-
return f"[red]MAINTENANCE[/red]"
|
|
172
|
-
return f"[green]{available}[/green]" if available > 0 else f"[red]{available}[/red]"
|
|
173
|
-
|
|
174
150
|
def _mig_breakdown(parent):
|
|
175
151
|
"""Compact per-slice availability for a parent, e.g. (['12×1G','4×2G'], 16, 32)."""
|
|
176
152
|
parts, tot_a, tot_c = [], 0, 0
|
|
@@ -183,145 +159,108 @@ def select_gpu_type_interactive(
|
|
|
183
159
|
parts.append(f"{a}×{cgt.rsplit('-', 1)[-1].upper()}")
|
|
184
160
|
return parts, tot_a, tot_c
|
|
185
161
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
162
|
+
# ── The selectable list IS the table (boxed + colored) ────────────────────
|
|
163
|
+
# Box-drawing borders are non-selectable Separators; each GPU is a Choice whose
|
|
164
|
+
# title is FormattedText so cells are individually colored. questionary indents
|
|
165
|
+
# Separators and Choices identically, so the borders line up with the rows.
|
|
166
|
+
# Arrow keys move through the table; the » pointer marks the row, Enter picks it.
|
|
167
|
+
# Emoji are kept OUT of cells (double-width → would ragged the right border);
|
|
168
|
+
# status is conveyed with color instead.
|
|
169
|
+
G, R, Y, BL, CY, MG, DIM = (
|
|
170
|
+
"fg:ansigreen", "fg:ansired", "fg:ansiyellow",
|
|
171
|
+
"fg:ansiblue", "fg:ansicyan bold", "fg:ansimagenta", "fg:#808080")
|
|
172
|
+
|
|
173
|
+
def _status(info):
|
|
174
|
+
avail = int(info.get("available", 0))
|
|
175
|
+
est = info.get("estimated_wait_minutes", 0)
|
|
176
|
+
ql = int(info.get("queue_length", 0))
|
|
177
|
+
if avail > 0:
|
|
178
|
+
text, style = "available now", G
|
|
179
|
+
elif est:
|
|
180
|
+
text, style = _format_wait(avail, est)[0], Y
|
|
181
|
+
else:
|
|
182
|
+
text, style = "queued", Y
|
|
183
|
+
if ql > 0:
|
|
184
|
+
text += f" · {ql} queued"
|
|
185
|
+
return text, style
|
|
186
|
+
|
|
187
|
+
# rows: (cells[type, avail, maxres, total, status], styles|None, value, kind)
|
|
188
|
+
rows = []
|
|
189
|
+
for gt, info in full_gpus.items():
|
|
190
|
+
if info.get("maintenance", False):
|
|
191
|
+
reason = (info.get("maintenance_reason", "") or "maintenance")[:18]
|
|
192
|
+
rows.append(([gt.upper(), "-", "-", str(int(info.get("total", 0))),
|
|
193
|
+
f"MAINT: {reason}"], None, gt, "maint"))
|
|
194
|
+
continue
|
|
195
|
+
a = int(info.get("available", 0))
|
|
196
|
+
st_text, st_style = _status(info)
|
|
197
|
+
cells = [gt.upper(), str(a), str(int(info.get("max_reservable", 0))),
|
|
198
|
+
str(int(info.get("total", 0))), st_text]
|
|
199
|
+
styles = [CY, G if a > 0 else R, G, BL, st_style]
|
|
200
|
+
rows.append((cells, styles, gt, "gpu"))
|
|
201
|
+
parts, mig_a, mig_c = _mig_breakdown(gt)
|
|
202
|
+
if parts:
|
|
203
|
+
rows.append(([" └─ MIG", str(mig_a), "-", str(mig_c), " ".join(parts)],
|
|
204
|
+
None, None, "mig"))
|
|
205
|
+
|
|
206
|
+
spot_rows = []
|
|
223
207
|
if spot_gpus:
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
st = Table()
|
|
227
|
-
st.add_column("GPU Type", style="cyan")
|
|
228
|
-
st.add_column("Avail\nNow", style="green")
|
|
229
|
-
st.add_column("Per\nNode", style="bright_green")
|
|
230
|
-
st.add_column("Status", style="magenta")
|
|
231
|
-
st.add_column("Spot Discount", style="dim")
|
|
232
|
-
_on_demand = {"b300": 95, "b200": 95, "h200": 55, "h100": 98, "a100": 32, "t4": 4.5, "l4": 7}
|
|
208
|
+
_pn = {"b300": 8, "b200": 8, "h200": 8, "h100": 8, "a100": 8, "t4": 4, "l4": 4}
|
|
209
|
+
_od = {"b300": 95, "b200": 95, "h200": 55, "h100": 98, "a100": 32, "t4": 4.5, "l4": 7}
|
|
233
210
|
for gt, info in spot_gpus.items():
|
|
234
|
-
avail = info.get("available", 0)
|
|
235
|
-
pn = spot_per_node.get(gt, 8)
|
|
236
|
-
ad = f"[green]{avail}[/green]" if avail > 0 else "[dim]0[/dim]"
|
|
237
|
-
status = "[green]Node up[/green]" if avail > 0 else "Spins up on reserve (~10 min)"
|
|
211
|
+
avail = int(info.get("available", 0))
|
|
238
212
|
si = info.get("spot_info", {}) or {}
|
|
239
|
-
# Availability signal from spot price vs on-demand
|
|
240
213
|
sp = si.get("spot_price", "") if isinstance(si, dict) else ""
|
|
241
|
-
if not sp or
|
|
242
|
-
|
|
214
|
+
if not sp or "No spot data" in str(si.get("spot_signal", "")):
|
|
215
|
+
if avail <= 0:
|
|
216
|
+
continue
|
|
217
|
+
disc = "available now"
|
|
243
218
|
else:
|
|
244
219
|
try:
|
|
245
|
-
|
|
246
|
-
pct = int((1 - ratio) * 100)
|
|
247
|
-
if ratio < 0.4:
|
|
248
|
-
avail_signal = f"[green]High ({pct}% off)[/green]"
|
|
249
|
-
elif ratio < 0.7:
|
|
250
|
-
avail_signal = f"[yellow]Medium ({pct}% off)[/yellow]"
|
|
251
|
-
else:
|
|
252
|
-
avail_signal = f"[red]Low ({pct}% off)[/red]"
|
|
220
|
+
disc = f"~{int((1 - float(sp) / _od.get(gt, 50)) * 100)}% off"
|
|
253
221
|
except (ValueError, TypeError):
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
222
|
+
disc = "spot n/a"
|
|
223
|
+
st_text = ("node up · " if avail > 0 else "spins up · ") + disc
|
|
224
|
+
cells = [f"{gt.upper()} *", str(avail), f"{_pn.get(gt, 8)}/node", "-", st_text]
|
|
225
|
+
styles = [MG, G if avail > 0 else DIM, G, DIM, G if avail > 0 else Y]
|
|
226
|
+
spot_rows.append((cells, styles, f"spot:{gt}", "spot"))
|
|
259
227
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
228
|
+
headers = ["GPU Type", "Avail", "MaxRes", "Total", "Status"]
|
|
229
|
+
all_cells = [headers] + [r[0] for r in rows] + [s[0] for s in spot_rows]
|
|
230
|
+
W = [max(len(str(rc[i])) for rc in all_cells) for i in range(5)]
|
|
231
|
+
|
|
232
|
+
def _bar(left, mid, right):
|
|
233
|
+
return left + mid.join("─" * (w + 2) for w in W) + right
|
|
234
|
+
|
|
235
|
+
def _line(cells): # plain string row (header / mig / maint), inside the box
|
|
236
|
+
return "│" + "│".join(f" {str(c):<{W[i]}} " for i, c in enumerate(cells)) + "│"
|
|
237
|
+
|
|
238
|
+
def _ft(cells, styles): # colored row -> FormattedText for a Choice
|
|
239
|
+
toks = [("class:separator", "│")]
|
|
240
|
+
for i, c in enumerate(cells):
|
|
241
|
+
toks.append((styles[i], f" {str(c):<{W[i]}} "))
|
|
242
|
+
toks.append(("class:separator", "│"))
|
|
243
|
+
return toks
|
|
244
|
+
|
|
245
|
+
console.print()
|
|
246
|
+
choices = [questionary.Separator(_bar("┌", "┬", "┐")),
|
|
247
|
+
questionary.Separator(_line(headers)),
|
|
248
|
+
questionary.Separator(_bar("├", "┼", "┤"))]
|
|
249
|
+
if not rows:
|
|
250
|
+
choices.append(questionary.Separator(_line(["(none)", "", "", "", ""])))
|
|
251
|
+
for cells, styles, value, kind in rows:
|
|
252
|
+
if kind in ("mig", "maint"):
|
|
253
|
+
choices.append(questionary.Separator(_line(cells)))
|
|
274
254
|
else:
|
|
275
|
-
|
|
276
|
-
if ql > 0:
|
|
277
|
-
label += f" - {ql} in queue"
|
|
278
|
-
# Any parent with MIG children gets a slice hint (h100/b200 today).
|
|
279
|
-
mig_avail, mig_cap = _mig_aggregates(gt)
|
|
280
|
-
if mig_cap > 0:
|
|
281
|
-
label += f" — also {mig_avail}/{mig_cap} MIG slices"
|
|
282
|
-
choices.append(questionary.Choice(title=label, value=gt))
|
|
283
|
-
|
|
284
|
-
if mig_gpus:
|
|
285
|
-
choices.append(questionary.Separator("═══ 🔬 MIG Slices (fractional GPUs) ═══"))
|
|
286
|
-
for gt, info in mig_gpus.items():
|
|
287
|
-
avail = info.get("available", 0)
|
|
288
|
-
total = info.get("total", 0)
|
|
289
|
-
_, si = _format_wait(avail, info.get("estimated_wait_minutes", 0))
|
|
290
|
-
choices.append(questionary.Choice(
|
|
291
|
-
title=f"{si} {gt.upper()} ({avail}/{total} available)", value=gt))
|
|
255
|
+
choices.append(questionary.Choice(title=_ft(cells, styles), value=value))
|
|
292
256
|
|
|
293
|
-
if
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
for gt, info in spot_gpus.items():
|
|
298
|
-
avail = info.get("available", 0)
|
|
299
|
-
pn = _spot_per_node.get(gt, 8)
|
|
300
|
-
si_data = info.get("spot_info", {}) or {}
|
|
301
|
-
sp = si_data.get("spot_price", "") if isinstance(si_data, dict) else ""
|
|
302
|
-
# Derive availability signal
|
|
303
|
-
avail_now = int(info.get("available", 0))
|
|
304
|
-
if not sp or "No spot data" in str(si_data.get("spot_signal", "")):
|
|
305
|
-
if avail_now > 0:
|
|
306
|
-
signal = f"🟢 {avail_now} available now"
|
|
307
|
-
else:
|
|
308
|
-
continue
|
|
309
|
-
else:
|
|
310
|
-
try:
|
|
311
|
-
ratio = float(sp) / _on_demand.get(gt, 50)
|
|
312
|
-
pct = int((1 - ratio) * 100)
|
|
313
|
-
if ratio < 0.4: signal = f"🟢 High avail ({pct}% off)"
|
|
314
|
-
elif ratio < 0.7: signal = f"🟡 Medium ({pct}% off)"
|
|
315
|
-
else: signal = f"🔴 Low ({pct}% off)"
|
|
316
|
-
except (ValueError, TypeError):
|
|
317
|
-
signal = "availability unknown"
|
|
318
|
-
if avail > 0:
|
|
319
|
-
label = f"✅ {gt.upper()} * ({avail} free, {pn}/node, {signal})"
|
|
320
|
-
else:
|
|
321
|
-
label = f"⚡ {gt.upper()} * ({pn} GPUs/node, {signal})"
|
|
322
|
-
choices.append(questionary.Choice(title=label, value=f"spot:{gt}"))
|
|
257
|
+
if spot_rows:
|
|
258
|
+
choices.append(questionary.Separator(_bar("├", "┼", "┤")))
|
|
259
|
+
for cells, styles, value, _k in spot_rows:
|
|
260
|
+
choices.append(questionary.Choice(title=_ft(cells, styles), value=value))
|
|
323
261
|
|
|
324
|
-
choices.append(questionary.Separator("
|
|
262
|
+
choices.append(questionary.Separator(_bar("└", "┴", "┘")))
|
|
263
|
+
choices.append(questionary.Separator(" "))
|
|
325
264
|
if _hide_spot:
|
|
326
265
|
choices.append(questionary.Choice(
|
|
327
266
|
title="⚡ Show spot options (us-east-1, ~70% cheaper, may be preempted)",
|
|
@@ -334,7 +273,8 @@ def select_gpu_type_interactive(
|
|
|
334
273
|
while True:
|
|
335
274
|
try:
|
|
336
275
|
answer = questionary.select(
|
|
337
|
-
"Select GPU type:",
|
|
276
|
+
"Select GPU type — ↑/↓ then Enter (MIG: pick its parent GPU):",
|
|
277
|
+
choices=choices, style=custom_style
|
|
338
278
|
).ask()
|
|
339
279
|
|
|
340
280
|
if answer == "_refresh":
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "gpu-dev"
|
|
7
|
-
version = "0.7.
|
|
7
|
+
version = "0.7.4"
|
|
8
8
|
description = "CLI + Python SDK for PyTorch GPU developer server reservations"
|
|
9
9
|
authors = [{name = "PyTorch Team"}]
|
|
10
10
|
readme = "cli-tools/gpu-dev-cli/README.md"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/availability_updater/index.py
RENAMED
|
File without changes
|
|
File without changes
|
{gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/migration/tag_largest_snapshots.py
RENAMED
|
File without changes
|
|
File without changes
|
{gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/reservation_expiry/requirements.txt
RENAMED
|
File without changes
|
|
File without changes
|
{gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/reservation_processor/index.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/lambda/shared/k8s_resource_tracker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/migrations/migrate_disks_to_named.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.7.2 → gpu_dev-0.7.4}/terraform-gpu-devservers/templates/user-data-self-managed.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|