gpu-dev 0.7.1__tar.gz → 0.7.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/CLAUDE.md +1 -1
  2. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/PKG-INFO +1 -1
  3. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/cli.py +43 -12
  4. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/interactive.py +115 -165
  5. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/gpu_dev.egg-info/PKG-INFO +1 -1
  6. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/pyproject.toml +1 -1
  7. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/src/gpu_dev/__init__.py +1 -1
  8. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/.github/workflows/no-gitlinks.yml +0 -0
  9. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/.github/workflows/publish.yml +0 -0
  10. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/.gitignore +0 -0
  11. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/README.md +0 -0
  12. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/admin/README.md +0 -0
  13. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/admin/generate_stats.py +0 -0
  14. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/admin/requirements.txt +0 -0
  15. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/cli-tools/gpu-dev-cli/README.md +0 -0
  16. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/cli-tools/gpu-dev-cli/ZERO_CONFIG_SETUP.md +0 -0
  17. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/__init__.py +0 -0
  18. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/auth.py +0 -0
  19. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/config.py +0 -0
  20. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/disks.py +0 -0
  21. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/name_generator.py +0 -0
  22. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/reservations.py +0 -0
  23. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/ssh_proxy.py +0 -0
  24. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/cli-tools/gpu-dev-cli/minimal-iam-policy.json +0 -0
  25. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/cli-tools/scripts/clear_stale_disk_locks.py +0 -0
  26. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/docs/SDK_REPRO.md +0 -0
  27. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/docs/USER_GUIDE.md +0 -0
  28. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/docs/devgpu-features.html +0 -0
  29. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/docs/docker-mark-blue.svg +0 -0
  30. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/docs/icons8-cursor-ai.svg +0 -0
  31. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/gpu_dev.egg-info/SOURCES.txt +0 -0
  32. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/gpu_dev.egg-info/dependency_links.txt +0 -0
  33. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/gpu_dev.egg-info/entry_points.txt +0 -0
  34. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/gpu_dev.egg-info/requires.txt +0 -0
  35. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/gpu_dev.egg-info/top_level.txt +0 -0
  36. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/architecture.html +0 -0
  37. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/cli-demo.html +0 -0
  38. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/devgpu-features.html +0 -0
  39. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/docker-mark-blue.svg +0 -0
  40. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/feedback.png +0 -0
  41. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/gpu-fleet.html +0 -0
  42. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/icons8-cursor-ai.svg +0 -0
  43. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/index.html +0 -0
  44. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/k8s-under-the-hood.html +0 -0
  45. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/multinode.html +0 -0
  46. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/osdc-future-plans.html +0 -0
  47. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/problem.png +0 -0
  48. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/sandbox.html +0 -0
  49. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/sdk-demo.html +0 -0
  50. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/thesis.html +0 -0
  51. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/title-vid.mp4 +0 -0
  52. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/weneedgpus.png +0 -0
  53. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/presentation/wow.html +0 -0
  54. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/README.md +0 -0
  55. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/examples/batch_multi_gpu.py +0 -0
  56. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/examples/interactive_debug.py +0 -0
  57. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/examples/parallel_experiments.ipynb +0 -0
  58. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/examples/quickstart.ipynb +0 -0
  59. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/examples/run_tests.py +0 -0
  60. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/examples/submit_job.py +0 -0
  61. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/src/gpu_dev/_async/__init__.py +0 -0
  62. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/src/gpu_dev/_backend/__init__.py +0 -0
  63. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/src/gpu_dev/_backend/aws.py +0 -0
  64. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/src/gpu_dev/_backend/protocol.py +0 -0
  65. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/src/gpu_dev/_sync/__init__.py +0 -0
  66. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/src/gpu_dev/_sync/client.py +0 -0
  67. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/src/gpu_dev/_sync/sandbox.py +0 -0
  68. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/src/gpu_dev/_transport/__init__.py +0 -0
  69. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/src/gpu_dev/_transport/ssh.py +0 -0
  70. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/src/gpu_dev/common/__init__.py +0 -0
  71. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/src/gpu_dev/common/config.py +0 -0
  72. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/src/gpu_dev/common/enums.py +0 -0
  73. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/src/gpu_dev/common/errors.py +0 -0
  74. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/src/gpu_dev/common/models.py +0 -0
  75. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/src/gpu_dev/py.typed +0 -0
  76. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/tests/__init__.py +0 -0
  77. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/sdk/python/tests/test_models.py +0 -0
  78. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/setup.cfg +0 -0
  79. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-deck/backend.tf +0 -0
  80. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-deck/main.tf +0 -0
  81. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-deck/terraform.tfvars.example +0 -0
  82. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/.claude/skills/deploy.md +0 -0
  83. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/.terraform.lock.hcl +0 -0
  84. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/README.md +0 -0
  85. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/alb.tf +0 -0
  86. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/ami-baker.tf +0 -0
  87. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/availability.tf +0 -0
  88. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/backend.tf +0 -0
  89. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/build-node.tf +0 -0
  90. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/check_b200.py +0 -0
  91. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/cluster-autoscaler.tf +0 -0
  92. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/cmd_proxy.py +0 -0
  93. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/.dockerignore +0 -0
  94. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/Dockerfile +0 -0
  95. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/backup-dotfiles +0 -0
  96. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/bash_profile +0 -0
  97. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/bashrc +0 -0
  98. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/bashrc_ext +0 -0
  99. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/build-with-efa.sh +0 -0
  100. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/dotfiles-shutdown-handler +0 -0
  101. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/list-dotfile-versions +0 -0
  102. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/motd_script +0 -0
  103. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/nproc_wrapper +0 -0
  104. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/profile +0 -0
  105. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/restore-dotfiles +0 -0
  106. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/restore-dotfiles-version +0 -0
  107. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/setup-dotfiles-persistence +0 -0
  108. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/shell_env +0 -0
  109. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/ssh_config +0 -0
  110. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/zprofile +0 -0
  111. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/zshrc +0 -0
  112. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker/zshrc_ext +0 -0
  113. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker-build.tf +0 -0
  114. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker-example/Dockerfile +0 -0
  115. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/docker-example/hello.txt +0 -0
  116. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/ecr.tf +0 -0
  117. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/efs.tf +0 -0
  118. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/eks.tf +0 -0
  119. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/expiry.tf +0 -0
  120. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/git-cache.tf +0 -0
  121. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/gpu-dev-pod-irsa.tf +0 -0
  122. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/kubernetes.tf +0 -0
  123. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/lambda/availability_updater/index.py +0 -0
  124. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/lambda/availability_updater/requirements.txt +0 -0
  125. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/lambda/migration/tag_largest_snapshots.py +0 -0
  126. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/lambda/reservation_expiry/index.py +0 -0
  127. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/lambda/reservation_expiry/requirements.txt +0 -0
  128. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/lambda/reservation_processor/buildkit_job.py +0 -0
  129. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/lambda/reservation_processor/index.py +0 -0
  130. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/lambda/reservation_processor/requirements.txt +0 -0
  131. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/lambda/shared/__init__.py +0 -0
  132. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/lambda/shared/alb_utils.py +0 -0
  133. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/lambda/shared/dns_utils.py +0 -0
  134. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/lambda/shared/k8s_client.py +0 -0
  135. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/lambda/shared/k8s_resource_tracker.py +0 -0
  136. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/lambda/shared/requirements.txt +0 -0
  137. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/lambda/shared/snapshot_utils.py +0 -0
  138. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/lambda.tf +0 -0
  139. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/list_b200.py +0 -0
  140. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/main.tf +0 -0
  141. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/mig-config.tf +0 -0
  142. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/mig-parted-config.yaml +0 -0
  143. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py +0 -0
  144. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py.bak +0 -0
  145. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/migrations/check_snapshots.py +0 -0
  146. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/migrations/migrate_disks_to_named.py +0 -0
  147. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/migrations/run_backfill.sh +0 -0
  148. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/monitoring.tf +0 -0
  149. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/node-termination-handler.tf +0 -0
  150. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/outputs.tf +0 -0
  151. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/pyproject.toml +0 -0
  152. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/pytorch-prebuild.tf +0 -0
  153. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/queue.tf +0 -0
  154. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/route53.tf +0 -0
  155. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/s3-disk-contents.tf +0 -0
  156. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/scripts/CLEANUP_GUIDE.md +0 -0
  157. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/scripts/detect_empty_volumes.sh +0 -0
  158. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/scripts/ec2_avail_probe.sh +0 -0
  159. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/scripts/inspect_user_data.sh +0 -0
  160. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/ssh-proxy/Dockerfile +0 -0
  161. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/ssh-proxy/proxy.py +0 -0
  162. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/ssh-proxy/requirements.txt +0 -0
  163. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/ssh-proxy-service.tf +0 -0
  164. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/ssh-proxy.tf +0 -0
  165. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/subnet-0fe3a2c45570091ad +0 -0
  166. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/switch-to.sh +0 -0
  167. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/templates/al2023-cpu-user-data.sh +0 -0
  168. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/templates/al2023-user-data.sh +0 -0
  169. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/templates/ami-baker-user-data.sh +0 -0
  170. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/templates/user-data-self-managed.sh +0 -0
  171. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/templates/user-data.sh +0 -0
  172. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/variables.tf +0 -0
  173. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/terraform-gpu-devservers/warm-pool.tf +0 -0
  174. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/tests/submit/README.md +0 -0
  175. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/tests/submit/fail/run.sh +0 -0
  176. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/tests/submit/multinode/run.sh +0 -0
  177. {gpu_dev-0.7.1 → gpu_dev-0.7.3}/tests/submit/success/run.sh +0 -0
@@ -318,7 +318,7 @@ module "us_east_1" {
318
318
 
319
319
  - **Merge multi-region into single tf state** - HIGH PRIORITY. Kill prod-east1 workspace, refactor into module-per-region in one state. See research notes below. Enables: one `tf apply`, shared AMI (aws_ami_copy), shared Docker (ECR replication already set up), no double builds. Prerequisite for adding west regions.
320
320
  - **Add us-west-1 and us-west-2 spot regions** - BLOCKED on single-state refactor. After refactor, adding a region = adding one module block.
321
- - **Spot UX improvements** - Queue position should be #1 for each type (not cross-type FIFO). Status should show "queued (waiting for capacity)" not just "queued". Interactive picker should show spot GPU counts from east1 not prod.
321
+ - **Spot UX improvements** - Queue position should be #1 for each type (not cross-type FIFO). Status should show "queued (waiting for capacity)" not just "queued". Interactive picker should show spot GPU counts from east1 not prod. NOTE (2026-05-30): spot is now **hidden by default** in `gpu-dev reserve` (interactive picker), `gpu-dev avail`, and watch mode — `cpu-spot` + the us-east-1 spot cluster only appear with `--spot` (reserve/avail flag) or the "⚡ Show spot options" picker entry. Spot was too bloated/half-baked for the default view. CLI-only change (`cli.py` `_show_availability`/`_show_availability_watch`/`avail`/`reserve`, `interactive.py` `select_gpu_type_interactive`).
322
322
  - **FQDN for devservers** - Set up proper domain names for development server access
323
323
  - **Automated SSH config per reservation** - ✅ DONE - Each reservation now gets `~/.devgpu/<reservation_id>-sshconfig` file, use with `ssh -F ~/.devgpu/<reservation_id>-sshconfig <pod_name>`
324
324
  - **Custom Docker image scaffold** - Create Dockerfile with pre-installed packages (Jupyter, etc.)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpu-dev
3
- Version: 0.7.1
3
+ Version: 0.7.3
4
4
  Summary: CLI + Python SDK for PyTorch GPU developer server reservations
5
5
  Author: PyTorch Team
6
6
  Requires-Python: >=3.10
@@ -829,9 +829,9 @@ def reserve(
829
829
  rprint("[red]❌ Could not get GPU availability information[/red]")
830
830
  return
831
831
 
832
- # Interactive GPU type selection
832
+ # Interactive GPU type selection (spot hidden unless --spot)
833
833
  if gpu_type is None:
834
- gpu_type = select_gpu_type_interactive(availability_info)
834
+ gpu_type = select_gpu_type_interactive(availability_info, show_spot=spot)
835
835
  if gpu_type is None:
836
836
  rprint("[yellow]Reservation cancelled.[/yellow]")
837
837
  return
@@ -3163,8 +3163,11 @@ def _format_gpu_display(gpu_count, gpu_type):
3163
3163
  return f"{gpu_count}x {str(gpu_type).upper()}"
3164
3164
 
3165
3165
 
3166
- def _show_availability() -> None:
3167
- """Shared function to show GPU availability"""
3166
+ def _show_availability(show_spot: bool = False) -> None:
3167
+ """Shared function to show GPU availability.
3168
+
3169
+ Spot SKUs (cpu-spot + the us-east-1 spot cluster) are hidden unless show_spot.
3170
+ """
3168
3171
  try:
3169
3172
  with Live(
3170
3173
  Spinner("dots", text="📡 Checking GPU availability..."), console=console
@@ -3181,7 +3184,7 @@ def _show_availability() -> None:
3181
3184
  _east1_spot_types = frozenset(Config.ENVIRONMENTS.get("prod-east1", {}).get("spot_types", []))
3182
3185
 
3183
3186
  def _fetch_east1_spot():
3184
- if _env_name != "prod" or not _east1_spot_types:
3187
+ if not show_spot or _env_name != "prod" or not _east1_spot_types:
3185
3188
  return {}
3186
3189
  east1_r = Config.ENVIRONMENTS["prod-east1"]["region"]
3187
3190
  east1_table = config.session.resource("dynamodb", region_name=east1_r).Table("pytorch-gpu-dev-gpu-availability")
@@ -3247,8 +3250,16 @@ def _show_availability() -> None:
3247
3250
  "CPU (arm64)": 6,
3248
3251
  }
3249
3252
 
3250
- # Split into categories
3251
- full_types = {k: v for k, v in availability_info.items() if "mig" not in k}
3253
+ # Split into categories. Hide spot SKUs (e.g. cpu-spot) unless --spot,
3254
+ # but never hide everything if the env is spot-only.
3255
+ def _is_spot(k):
3256
+ return k == "cpu-spot" or k.endswith("-spot")
3257
+ _non_spot_exists = any(not _is_spot(k) for k in availability_info if "mig" not in k)
3258
+ _hide_spot = (not show_spot) and _non_spot_exists
3259
+ full_types = {
3260
+ k: v for k, v in availability_info.items()
3261
+ if "mig" not in k and not (_hide_spot and _is_spot(k))
3262
+ }
3252
3263
  mig_types = {k: v for k, v in availability_info.items() if "mig" in k}
3253
3264
 
3254
3265
  def _sort_by_arch(items):
@@ -3344,8 +3355,12 @@ def _show_availability() -> None:
3344
3355
  rprint(" [green]●[/green]: 1+ full node available - [yellow]●[/yellow]: GPUs available, but no full node - [red]●[/red]: No GPUs available")
3345
3356
 
3346
3357
  # Show usage tip
3358
+ if _hide_spot:
3359
+ rprint(
3360
+ "\n[dim]💡 Spot instances hidden — pass '--spot' to show (us-east-1, ~70% cheaper, may be preempted)[/dim]"
3361
+ )
3347
3362
  rprint(
3348
- "\n[dim]💡 Use 'gpu-dev reserve' (interactive) to see all options including MIG slices and spot instances[/dim]"
3363
+ "\n[dim]💡 Use 'gpu-dev reserve' (interactive) to see all options including MIG slices[/dim]"
3349
3364
  )
3350
3365
 
3351
3366
  else:
@@ -3355,10 +3370,13 @@ def _show_availability() -> None:
3355
3370
  rprint(f"[red]❌ Error: {str(e)}[/red]")
3356
3371
 
3357
3372
 
3358
- def _show_availability_watch(interval: int) -> None:
3373
+ def _show_availability_watch(interval: int, show_spot: bool = False) -> None:
3359
3374
  _env_name = load_config().user_config.get("environment", "prod")
3360
3375
  _spot_types = frozenset(Config.ENVIRONMENTS.get(_env_name, {}).get("spot_types", []))
3361
3376
 
3377
+ def _is_spot(k):
3378
+ return k == "cpu-spot" or k.endswith("-spot")
3379
+
3362
3380
  """Watch mode for GPU availability with auto-refresh"""
3363
3381
  import time
3364
3382
  from datetime import datetime
@@ -3385,6 +3403,13 @@ def _show_availability_watch(interval: int) -> None:
3385
3403
  # Get availability data
3386
3404
  availability_info = reservation_mgr.get_gpu_availability_by_type()
3387
3405
 
3406
+ # Hide spot SKUs (e.g. cpu-spot) unless --spot, never hide everything.
3407
+ if availability_info and not show_spot:
3408
+ if any(not _is_spot(k) for k in availability_info if "mig" not in k):
3409
+ availability_info = {
3410
+ k: v for k, v in availability_info.items() if not _is_spot(k)
3411
+ }
3412
+
3388
3413
  if availability_info:
3389
3414
  # GPU architecture mapping (for display)
3390
3415
  gpu_architectures = {
@@ -4024,8 +4049,14 @@ def help(ctx: click.Context) -> None:
4024
4049
  default=5,
4025
4050
  help="Refresh interval in seconds for watch mode (default: 5)",
4026
4051
  )
4052
+ @click.option(
4053
+ "--spot",
4054
+ is_flag=True,
4055
+ default=False,
4056
+ help="Also show spot instances (us-east-1, ~70% cheaper, may be preempted). Hidden by default.",
4057
+ )
4027
4058
  @click.pass_context
4028
- def avail(ctx: click.Context, watch: bool, interval: int) -> None:
4059
+ def avail(ctx: click.Context, watch: bool, interval: int, spot: bool) -> None:
4029
4060
  """Show GPU availability by type and queue estimates
4030
4061
 
4031
4062
  Displays real-time information about GPU availability for each GPU type.
@@ -4045,9 +4076,9 @@ def avail(ctx: click.Context, watch: bool, interval: int) -> None:
4045
4076
  This helps you choose the right GPU type and understand wait times before reserving.
4046
4077
  """
4047
4078
  if watch:
4048
- _show_availability_watch(interval)
4079
+ _show_availability_watch(interval, show_spot=spot)
4049
4080
  else:
4050
- _show_availability()
4081
+ _show_availability(show_spot=spot)
4051
4082
 
4052
4083
 
4053
4084
  @main.command()
@@ -50,11 +50,22 @@ def check_interactive_support() -> bool:
50
50
  return True
51
51
 
52
52
 
53
+ def _is_spot_type(gt: str) -> bool:
54
+ """Spot SKUs hidden from default views: the cpu-spot type + any `*-spot` type."""
55
+ return gt == "cpu-spot" or gt.endswith("-spot")
56
+
57
+
53
58
  def select_gpu_type_interactive(
54
59
  availability_info: Dict[str, Dict[str, Any]],
55
60
  _refresh: bool = False,
61
+ show_spot: bool = False,
56
62
  ) -> Optional[str]:
57
- """Interactive GPU type selection with availability table"""
63
+ """Interactive GPU type selection with availability table.
64
+
65
+ Spot SKUs (cpu-spot + the cross-region us-east-1 spot cluster) are hidden by
66
+ default — pass show_spot=True (CLI `--spot`) or pick the "Show spot options"
67
+ entry to reveal them.
68
+ """
58
69
  if not check_interactive_support():
59
70
  return None
60
71
 
@@ -65,33 +76,19 @@ def select_gpu_type_interactive(
65
76
  _mgr = ReservationManager(_cfg)
66
77
  availability_info = _mgr.get_gpu_availability_by_type() or availability_info
67
78
 
79
+ # Don't hide spot when the whole environment is spot-only (nothing left to show).
80
+ _non_spot_exists = any(
81
+ not _is_spot_type(gt) for gt in availability_info if "-mig-" not in gt
82
+ )
83
+ _hide_spot = (not show_spot) and _non_spot_exists
84
+
68
85
  # Hide MIG slice SKUs from the top-level selector — reached via the h100 submenu.
69
86
  # Direct `--gpu-type h100-mig-1g` still works for non-interactive scripts.
70
87
  visible_info = {
71
88
  gt: info for gt, info in availability_info.items()
72
- if "-mig-" not in gt
89
+ if "-mig-" not in gt and not (_hide_spot and _is_spot_type(gt))
73
90
  }
74
91
 
75
- # Aggregate MIG slice availability per parent type, hinted on the h100/b200 rows.
76
- def _mig_aggregates(parent: str):
77
- avail = sum(
78
- int(info.get("available", 0))
79
- for gt, info in (availability_info or {}).items()
80
- if gt.startswith(f"{parent}-mig-")
81
- )
82
- cap = sum(
83
- int(info.get("total", 0))
84
- for gt, info in (availability_info or {}).items()
85
- if gt.startswith(f"{parent}-mig-")
86
- )
87
- return avail, cap
88
-
89
- h100_mig_avail, h100_mig_capacity = _mig_aggregates("h100")
90
- b200_mig_avail, b200_mig_capacity = _mig_aggregates("b200")
91
- # Backwards-compat aliases for the existing h100 row code below.
92
- mig_total_available = h100_mig_avail
93
- mig_total_capacity = h100_mig_capacity
94
-
95
92
  # Detect spot types and fetch cross-region spot availability
96
93
  from .config import Config, load_config
97
94
  _cfg = load_config()
@@ -102,9 +99,10 @@ def select_gpu_type_interactive(
102
99
  has_spot_types = len(_spot_types) > 0
103
100
 
104
101
  # Cross-region: if we're on prod, also fetch prod-east1 spot availability
102
+ # (skipped entirely when spot is hidden — saves a DynamoDB scan).
105
103
  spot_region_info = {}
106
104
  spot_region_name = None
107
- if _env_name == "prod":
105
+ if _env_name == "prod" and not _hide_spot:
108
106
  east1_env = Config.ENVIRONMENTS.get("prod-east1", {})
109
107
  if east1_env:
110
108
  spot_region_name = "prod-east1"
@@ -130,16 +128,11 @@ def select_gpu_type_interactive(
130
128
  except Exception as e:
131
129
  pass # east1 not accessible — show without spot
132
130
 
133
- # Categorize GPU types into 3 sections
134
- full_gpus = {}
135
- mig_gpus = {}
136
- for gt, info in visible_info.items():
137
- if "mig" in gt:
138
- mig_gpus[gt] = info
139
- else:
140
- full_gpus[gt] = info
131
+ # visible_info already excludes -mig- SKUs and (when hidden) spot, so these are
132
+ # all "full" rows; MIG slices render as a sub-row under their parent.
133
+ full_gpus = dict(visible_info)
141
134
 
142
- # Spot types from cross-region (prod-east1) — only non-MIG, non-CPU spot types
135
+ # Spot types from cross-region (prod-east1).
143
136
  spot_gpus = {k: v for k, v in spot_region_info.items() if k in _spot_types}
144
137
 
145
138
  def _format_wait(available, est_wait):
@@ -154,162 +147,119 @@ def select_gpu_type_interactive(
154
147
  return f"{h}h{f' {m}min' if m else ''}", "⏳"
155
148
  return "Unknown", "⚠️"
156
149
 
157
- def _format_avail(available, is_maintenance, maintenance_reason):
158
- if is_maintenance:
159
- return f"[red]MAINTENANCE[/red]"
160
- return f"[green]{available}[/green]" if available > 0 else f"[red]{available}[/red]"
161
-
162
- def _build_table(title, items, is_spot=False):
163
- console.print(f"\n[cyan]{title}[/cyan]")
164
- table = Table()
165
- table.add_column("GPU Type", style="cyan")
166
- table.add_column("Avail", style="green")
167
- table.add_column("Max\nReservable", style="bright_green")
168
- table.add_column("Total", style="blue")
169
- table.add_column("Est. Wait Time", style="magenta")
170
- for gt, info in items.items():
171
- avail = info.get("available", 0)
172
- maint = info.get("maintenance", False)
173
- maint_reason = info.get("maintenance_reason", "")
174
- wait_display, _ = _format_wait(avail, info.get("estimated_wait_minutes", 0))
175
- if maint:
176
- wait_display = maint_reason or "Under maintenance"
177
- label = f"{gt.upper()} *" if is_spot else gt.upper()
178
- table.add_row(
179
- label,
180
- _format_avail(avail, maint, maint_reason),
181
- "-" if maint else str(info.get("max_reservable", 0)),
182
- str(info.get("total", 0)),
183
- wait_display,
184
- )
185
- console.print(table)
186
-
187
- # Section 1: Full GPUs & CPUs
188
- _build_table("━━━ Full GPUs & CPUs ━━━", full_gpus)
189
-
190
- # Section 2: MIG Slices
191
- if mig_gpus:
192
- console.print("[dim] Sliced GPUs isolated fractions of a physical GPU. Perfect for smaller jobs[/dim]")
193
- console.print("[dim] that don\'t need full performance or VRAM.[/dim]")
194
- _build_table("━━━ 🔬 MIG Slices ━━━", mig_gpus)
195
-
196
- # Section 3: Spot Instances (cross-region) — custom table with per-node + price
150
+ def _mig_breakdown(parent):
151
+ """Compact per-slice availability for a parent, e.g. (['12×1G','4×2G'], 16, 32)."""
152
+ parts, tot_a, tot_c = [], 0, 0
153
+ for cgt, ci in sorted((availability_info or {}).items()):
154
+ if not cgt.startswith(f"{parent}-mig-"):
155
+ continue
156
+ a, c = int(ci.get("available", 0)), int(ci.get("total", 0))
157
+ tot_a += a
158
+ tot_c += c
159
+ parts.append(f"{a}×{cgt.rsplit('-', 1)[-1].upper()}")
160
+ return parts, tot_a, tot_c
161
+
162
+ # ── The selectable list IS the table ──────────────────────────────────────
163
+ # questionary indents Separators and Choices identically, so a Separator
164
+ # header + aligned column text line up with the selectable rows. Arrow keys
165
+ # move through the table; Enter picks the highlighted row. No separate print.
166
+ def _row_cells(gt, info, is_spot=False):
167
+ avail = int(info.get("available", 0))
168
+ wd, emoji = _format_wait(avail, info.get("estimated_wait_minutes", 0))
169
+ ql = int(info.get("queue_length", 0))
170
+ if ql > 0:
171
+ wd += f" · {ql} queued"
172
+ typ = f"{gt.upper()} *" if is_spot else gt.upper()
173
+ return [typ, str(avail), str(int(info.get("max_reservable", 0))),
174
+ str(int(info.get("total", 0)))], f"{emoji} {wd}"
175
+
176
+ # Rows: (cells[type, avail, maxres, total], status, value, kind).
177
+ data_rows = []
178
+ for gt, info in full_gpus.items():
179
+ if info.get("maintenance", False):
180
+ data_rows.append((
181
+ [gt.upper(), "-", "-", str(int(info.get("total", 0)))],
182
+ f"MAINTENANCE: {info.get('maintenance_reason', '')}", gt, "maint"))
183
+ continue
184
+ cells, status = _row_cells(gt, info)
185
+ data_rows.append((cells, status, gt, "gpu"))
186
+ parts, mig_a, mig_c = _mig_breakdown(gt)
187
+ if parts:
188
+ data_rows.append((
189
+ [" └─ MIG", str(mig_a), "-", str(mig_c)],
190
+ f"{' '.join(parts)} · pick {gt.upper()} ↑", None, "mig"))
191
+
192
+ spot_data = []
197
193
  if spot_gpus:
198
- spot_per_node = {"b300": 8, "b200": 8, "h200": 8, "h100": 8, "a100": 8, "t4": 4, "l4": 4}
199
- console.print(f"\n[cyan]━━━ Spot Instances (us-east-1, ~70% cheaper) ━━━[/cyan]")
200
- st = Table()
201
- st.add_column("GPU Type", style="cyan")
202
- st.add_column("Avail\nNow", style="green")
203
- st.add_column("Per\nNode", style="bright_green")
204
- st.add_column("Status", style="magenta")
205
- st.add_column("Spot Discount", style="dim")
206
- _on_demand = {"b300": 95, "b200": 95, "h200": 55, "h100": 98, "a100": 32, "t4": 4.5, "l4": 7}
194
+ _pn = {"b300": 8, "b200": 8, "h200": 8, "h100": 8, "a100": 8, "t4": 4, "l4": 4}
195
+ _od = {"b300": 95, "b200": 95, "h200": 55, "h100": 98, "a100": 32, "t4": 4.5, "l4": 7}
207
196
  for gt, info in spot_gpus.items():
208
- avail = info.get("available", 0)
209
- pn = spot_per_node.get(gt, 8)
210
- ad = f"[green]{avail}[/green]" if avail > 0 else "[dim]0[/dim]"
211
- status = "[green]Node up[/green]" if avail > 0 else "Spins up on reserve (~10 min)"
197
+ avail = int(info.get("available", 0))
212
198
  si = info.get("spot_info", {}) or {}
213
- # Availability signal from spot price vs on-demand
214
199
  sp = si.get("spot_price", "") if isinstance(si, dict) else ""
215
- if not sp or (isinstance(si, dict) and "No spot data" in str(si.get("spot_signal", ""))):
216
- avail_signal = "[green]Available[/green]" if avail > 0 else "[dim]No price data[/dim]"
200
+ if not sp or "No spot data" in str(si.get("spot_signal", "")):
201
+ if avail <= 0:
202
+ continue
203
+ disc = "available now"
217
204
  else:
218
205
  try:
219
- ratio = float(sp) / _on_demand.get(gt, 50)
220
- pct = int((1 - ratio) * 100)
221
- if ratio < 0.4:
222
- avail_signal = f"[green]High ({pct}% off)[/green]"
223
- elif ratio < 0.7:
224
- avail_signal = f"[yellow]Medium ({pct}% off)[/yellow]"
225
- else:
226
- avail_signal = f"[red]Low ({pct}% off)[/red]"
206
+ disc = f"~{int((1 - float(sp) / _od.get(gt, 50)) * 100)}% off"
227
207
  except (ValueError, TypeError):
228
- avail_signal = "[yellow]Unknown[/yellow]"
229
- st.add_row(f"{gt.upper()} *", ad, str(pn), status, avail_signal)
230
- console.print(st)
231
- console.print("[dim]* = spot: ~70% cheaper, AWS can reclaim with 2-min notice, fulfillment not guaranteed.[/dim]")
232
- console.print("[dim] Separate cluster with separate disks. A node spins up when you reserve.[/dim]")
208
+ disc = "spot price n/a"
209
+ status = (" node up" if avail > 0 else "⚡ spins up ~10min") + f" · {disc}"
210
+ spot_data.append((
211
+ [f"{gt.upper()} *", str(avail), f"{_pn.get(gt, 8)}/node", "-"],
212
+ status, f"spot:{gt}", "spot"))
233
213
 
234
- # Build choices across all 3 sections
235
- choices = []
236
- if full_gpus:
237
- choices.append(questionary.Separator("═══ Full GPUs & CPUs ═══"))
238
- for gt, info in full_gpus.items():
239
- avail = info.get("available", 0)
240
- total = info.get("total", 0)
241
- maint = info.get("maintenance", False)
242
- maint_reason = info.get("maintenance_reason", "")
243
- _, status_indicator = _format_wait(avail, info.get("estimated_wait_minutes", 0))
244
- ql = info.get("queue_length", 0)
245
- if maint:
246
- choices.append(questionary.Choice(
247
- title=f"🔧 {gt.upper()} - MAINTENANCE: {maint_reason}", value=gt, disabled="Under maintenance"))
214
+ # Column widths over the 4 text columns (header + all rows).
215
+ headers = ["GPU Type", "Avail", "MaxRes", "Total"]
216
+ _all_cells = [headers] + [r[0] for r in data_rows] + [s[0] for s in spot_data]
217
+ widths = [max(len(str(row[i])) for row in _all_cells) for i in range(4)]
218
+
219
+ def _fmt(cells, status=""):
220
+ body = " ".join(str(c).ljust(widths[i]) for i, c in enumerate(cells))
221
+ return f"{body} {status}".rstrip()
222
+
223
+ console.print()
224
+ choices = [questionary.Separator(_fmt(headers, "Status"))]
225
+ if not data_rows:
226
+ choices.append(questionary.Separator("(no GPU types available)"))
227
+ for cells, status, value, kind in data_rows:
228
+ title = _fmt(cells, status)
229
+ if kind == "mig":
230
+ choices.append(questionary.Separator(title))
231
+ elif kind == "maint":
232
+ choices.append(questionary.Choice(title=title, value=value, disabled="maintenance"))
248
233
  else:
249
- label = f"{status_indicator} {gt.upper()} ({avail}/{total} available)"
250
- if ql > 0:
251
- label += f" - {ql} in queue"
252
- if gt == "h100" and mig_total_capacity > 0:
253
- label += f" — also {mig_total_available}/{mig_total_capacity} MIG slices"
254
- elif gt == "b200" and b200_mig_capacity > 0:
255
- label += f" — also {b200_mig_avail}/{b200_mig_capacity} MIG slices"
256
- choices.append(questionary.Choice(title=label, value=gt))
257
-
258
- if mig_gpus:
259
- choices.append(questionary.Separator("═══ 🔬 MIG Slices (fractional GPUs) ═══"))
260
- for gt, info in mig_gpus.items():
261
- avail = info.get("available", 0)
262
- total = info.get("total", 0)
263
- _, si = _format_wait(avail, info.get("estimated_wait_minutes", 0))
264
- choices.append(questionary.Choice(
265
- title=f"{si} {gt.upper()} ({avail}/{total} available)", value=gt))
234
+ choices.append(questionary.Choice(title=title, value=value))
266
235
 
267
- if spot_gpus:
268
- _spot_per_node = {"b300": 8, "b200": 8, "h200": 8, "h100": 8, "a100": 8, "t4": 4, "l4": 4}
269
- _on_demand = {"b300": 95, "b200": 95, "h200": 55, "h100": 98, "a100": 32, "t4": 4.5, "l4": 7}
270
- choices.append(questionary.Separator("═══ ⚡ Spot Instances (us-east-1) ═══"))
271
- for gt, info in spot_gpus.items():
272
- avail = info.get("available", 0)
273
- pn = _spot_per_node.get(gt, 8)
274
- si_data = info.get("spot_info", {}) or {}
275
- sp = si_data.get("spot_price", "") if isinstance(si_data, dict) else ""
276
- # Derive availability signal
277
- avail_now = int(info.get("available", 0))
278
- if not sp or "No spot data" in str(si_data.get("spot_signal", "")):
279
- if avail_now > 0:
280
- signal = f"🟢 {avail_now} available now"
281
- else:
282
- continue
283
- else:
284
- try:
285
- ratio = float(sp) / _on_demand.get(gt, 50)
286
- pct = int((1 - ratio) * 100)
287
- if ratio < 0.4: signal = f"🟢 High avail ({pct}% off)"
288
- elif ratio < 0.7: signal = f"🟡 Medium ({pct}% off)"
289
- else: signal = f"🔴 Low ({pct}% off)"
290
- except (ValueError, TypeError):
291
- signal = "availability unknown"
292
- if avail > 0:
293
- label = f"✅ {gt.upper()} * ({avail} free, {pn}/node, {signal})"
294
- else:
295
- label = f"⚡ {gt.upper()} * ({pn} GPUs/node, {signal})"
296
- choices.append(questionary.Choice(title=label, value=f"spot:{gt}"))
236
+ if spot_data:
237
+ choices.append(questionary.Separator(" Spot us-east-1, ~70% cheaper, may be preempted:"))
238
+ for cells, status, value, _kind in spot_data:
239
+ choices.append(questionary.Choice(title=_fmt(cells, status), value=value))
297
240
 
298
241
  choices.append(questionary.Separator("───"))
242
+ if _hide_spot:
243
+ choices.append(questionary.Choice(
244
+ title="⚡ Show spot options (us-east-1, ~70% cheaper, may be preempted)",
245
+ value="_show_spot"))
299
246
  choices.append(questionary.Choice(title="🔄 Refresh availability", value="_refresh"))
300
247
 
301
248
  console.print()
302
249
 
303
- # Interactive selection — loop on refresh
250
+ # Interactive selection — loop on refresh / spot toggle
304
251
  while True:
305
252
  try:
306
253
  answer = questionary.select(
307
- "Select GPU type:", choices=choices, style=custom_style
254
+ "Select GPU type (↑/↓, Enter):", choices=choices, style=custom_style
308
255
  ).ask()
309
256
 
310
257
  if answer == "_refresh":
311
258
  console.print("[dim]Refreshing...[/dim]")
312
- return select_gpu_type_interactive(availability_info, _refresh=True)
259
+ return select_gpu_type_interactive(
260
+ availability_info, _refresh=True, show_spot=show_spot)
261
+ if answer == "_show_spot":
262
+ return select_gpu_type_interactive(availability_info, show_spot=True)
313
263
  return answer
314
264
  except (KeyboardInterrupt, EOFError):
315
265
  console.print("\n[yellow]Selection cancelled.[/yellow]")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpu-dev
3
- Version: 0.7.1
3
+ Version: 0.7.3
4
4
  Summary: CLI + Python SDK for PyTorch GPU developer server reservations
5
5
  Author: PyTorch Team
6
6
  Requires-Python: >=3.10
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "gpu-dev"
7
- version = "0.7.1"
7
+ version = "0.7.3"
8
8
  description = "CLI + Python SDK for PyTorch GPU developer server reservations"
9
9
  authors = [{name = "PyTorch Team"}]
10
10
  readme = "cli-tools/gpu-dev-cli/README.md"
@@ -63,4 +63,4 @@ try:
63
63
 
64
64
  __version__ = _pkg_version("gpu-dev")
65
65
  except Exception:
66
- __version__ = "0.7.1"
66
+ __version__ = "0.7.3"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes