gpu-dev 0.5.5__tar.gz → 0.5.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/PKG-INFO +1 -1
  2. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/PKG-INFO +1 -1
  3. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/pyproject.toml +1 -1
  4. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/availability_updater/index.py +8 -3
  5. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda.tf +1 -1
  6. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/.github/workflows/no-gitlinks.yml +0 -0
  7. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/.github/workflows/publish.yml +0 -0
  8. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/.gitignore +0 -0
  9. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/CLAUDE.md +0 -0
  10. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/PROGRESS.md +0 -0
  11. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/PR_DESCRIPTION.md +0 -0
  12. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/TODO.md +0 -0
  13. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/admin/README.md +0 -0
  14. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/admin/generate_stats.py +0 -0
  15. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/admin/requirements.txt +0 -0
  16. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/README.md +0 -0
  17. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/ZERO_CONFIG_SETUP.md +0 -0
  18. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/SOURCES.txt +0 -0
  19. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/dependency_links.txt +0 -0
  20. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/entry_points.txt +0 -0
  21. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/requires.txt +0 -0
  22. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/top_level.txt +0 -0
  23. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/__init__.py +0 -0
  24. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/auth.py +0 -0
  25. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/cli.py +0 -0
  26. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/config.py +0 -0
  27. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/disks.py +0 -0
  28. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/interactive.py +0 -0
  29. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/name_generator.py +0 -0
  30. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/reservations.py +0 -0
  31. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/ssh_proxy.py +0 -0
  32. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/minimal-iam-policy.json +0 -0
  33. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/scripts/clear_stale_disk_locks.py +0 -0
  34. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/docs/USER_GUIDE.md +0 -0
  35. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/docs/devgpu-features.html +0 -0
  36. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/docs/docker-mark-blue.svg +0 -0
  37. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/docs/icons8-cursor-ai.svg +0 -0
  38. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/post.md +0 -0
  39. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/setup.cfg +0 -0
  40. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/.claude/skills/deploy.md +0 -0
  41. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/.terraform.lock.hcl +0 -0
  42. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/README.md +0 -0
  43. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/alb.tf +0 -0
  44. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/availability.tf +0 -0
  45. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/backend.tf +0 -0
  46. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/.dockerignore +0 -0
  47. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/Dockerfile +0 -0
  48. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/backup-dotfiles +0 -0
  49. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/bash_profile +0 -0
  50. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/bashrc +0 -0
  51. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/bashrc_ext +0 -0
  52. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/build-with-efa.sh +0 -0
  53. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/dotfiles-shutdown-handler +0 -0
  54. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/list-dotfile-versions +0 -0
  55. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/motd_script +0 -0
  56. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/nproc_wrapper +0 -0
  57. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/profile +0 -0
  58. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/restore-dotfiles +0 -0
  59. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/restore-dotfiles-version +0 -0
  60. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/setup-dotfiles-persistence +0 -0
  61. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/shell_env +0 -0
  62. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/ssh_config +0 -0
  63. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/zprofile +0 -0
  64. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/zshrc +0 -0
  65. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/zshrc_ext +0 -0
  66. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker-build.tf +0 -0
  67. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker-example/Dockerfile +0 -0
  68. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker-example/hello.txt +0 -0
  69. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/ecr.tf +0 -0
  70. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/efs.tf +0 -0
  71. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/eks.tf +0 -0
  72. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/expiry.tf +0 -0
  73. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/git-cache.tf +0 -0
  74. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/kubernetes.tf +0 -0
  75. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/availability_updater/requirements.txt +0 -0
  76. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/migration/tag_largest_snapshots.py +0 -0
  77. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/reservation_expiry/index.py +0 -0
  78. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/reservation_expiry/requirements.txt +0 -0
  79. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/reservation_processor/buildkit_job.py +0 -0
  80. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/reservation_processor/index.py +0 -0
  81. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/reservation_processor/requirements.txt +0 -0
  82. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/shared/__init__.py +0 -0
  83. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/shared/alb_utils.py +0 -0
  84. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/shared/dns_utils.py +0 -0
  85. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/shared/k8s_client.py +0 -0
  86. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/shared/k8s_resource_tracker.py +0 -0
  87. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/shared/requirements.txt +0 -0
  88. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/shared/snapshot_utils.py +0 -0
  89. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/main.tf +0 -0
  90. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py +0 -0
  91. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py.bak +0 -0
  92. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/migrations/check_snapshots.py +0 -0
  93. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/migrations/migrate_disks_to_named.py +0 -0
  94. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/migrations/run_backfill.sh +0 -0
  95. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/monitoring.tf +0 -0
  96. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/outputs.tf +0 -0
  97. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/pyproject.toml +0 -0
  98. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/queue.tf +0 -0
  99. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/route53.tf +0 -0
  100. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/s3-disk-contents.tf +0 -0
  101. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/scripts/CLEANUP_GUIDE.md +0 -0
  102. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/scripts/detect_empty_volumes.sh +0 -0
  103. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/scripts/ec2_avail_probe.sh +0 -0
  104. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/scripts/inspect_user_data.sh +0 -0
  105. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/ssh-proxy/Dockerfile +0 -0
  106. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/ssh-proxy/proxy.py +0 -0
  107. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/ssh-proxy/requirements.txt +0 -0
  108. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/ssh-proxy-service.tf +0 -0
  109. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/ssh-proxy.tf +0 -0
  110. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/switch-to.sh +0 -0
  111. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/templates/al2023-cpu-user-data.sh +0 -0
  112. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/templates/al2023-user-data.sh +0 -0
  113. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/templates/user-data-self-managed.sh +0 -0
  114. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/templates/user-data.sh +0 -0
  115. {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/variables.tf +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpu-dev
3
- Version: 0.5.5
3
+ Version: 0.5.6
4
4
  Summary: CLI tool for PyTorch GPU developer server reservations
5
5
  Author: PyTorch Team
6
6
  Requires-Python: >=3.10
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpu-dev
3
- Version: 0.5.5
3
+ Version: 0.5.6
4
4
  Summary: CLI tool for PyTorch GPU developer server reservations
5
5
  Author: PyTorch Team
6
6
  Requires-Python: >=3.10
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "gpu-dev"
7
- version = "0.5.5"
7
+ version = "0.5.6"
8
8
  description = "CLI tool for PyTorch GPU developer server reservations"
9
9
  authors = [{name = "PyTorch Team"}]
10
10
  readme = "cli-tools/gpu-dev-cli/README.md"
@@ -515,8 +515,14 @@ def compute_size_etas(v1, gpu_type, node_label_value, resource_name, gpus_per_in
515
515
  pass
516
516
  if gpus > 0:
517
517
  pod_to_info[pod.metadata.name] = (pod.spec.node_name, gpus)
518
-
519
- # 3) Cross-reference active reservations to populate per-node expirations.
518
+ # used_now is the k8s ground-truth — count every running/pending pod, not just those
519
+ # we can match to a reservation row. Otherwise pods without DDB rows look like free GPUs.
520
+ node_state[pod.spec.node_name]["used_now"] += gpus
521
+
522
+ # 3) Cross-reference active reservations to attach expiry timestamps to each known pod.
523
+ # Pods without a matching reservation row keep their GPUs marked as used_now but have no
524
+ # expiration → they're treated as "never expiring" by the simulation, which is the safe
525
+ # fallback (we don't fabricate ETAs for usage we can't trace).
520
526
  target_gpu_type_lower = gpu_type.lower()
521
527
  for r in active_reservations:
522
528
  # Reservations table stores gpu_type uppercased ("H100"); compare case-insensitively.
@@ -534,7 +540,6 @@ def compute_size_etas(v1, gpu_type, node_label_value, resource_name, gpus_per_in
534
540
  except (ValueError, TypeError):
535
541
  continue
536
542
  node_name, gpus = pod_to_info[pod_name]
537
- node_state[node_name]["used_now"] += gpus
538
543
  node_state[node_name]["expirations"].append((ts, gpus))
539
544
 
540
545
  # Sort each node's expirations by time.
@@ -180,7 +180,7 @@ resource "aws_lambda_function" "reservation_processor" {
180
180
  HOSTED_ZONE_ID = local.effective_domain_name != "" ? local.hosted_zone_id : ""
181
181
  SSH_DOMAIN_MAPPINGS_TABLE = local.effective_domain_name != "" ? aws_dynamodb_table.ssh_domain_mappings.name : ""
182
182
  SSL_CERTIFICATE_ARN = local.effective_domain_name != "" ? aws_acm_certificate.wildcard[0].arn : ""
183
- LAMBDA_VERSION = "0.5.5"
183
+ LAMBDA_VERSION = "0.5.6"
184
184
  MIN_CLI_VERSION = "0.5.5"
185
185
  DISK_CONTENTS_BUCKET = aws_s3_bucket.disk_contents.bucket
186
186
  OPERATIONS_TABLE = aws_dynamodb_table.operations.name
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes