gpu-dev 0.5.23__tar.gz → 0.5.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/PKG-INFO +1 -1
  2. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/PKG-INFO +1 -1
  3. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/gpu_dev_cli/auth.py +13 -0
  4. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/pyproject.toml +1 -1
  5. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/lambda/reservation_processor/index.py +6 -1
  6. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/lambda.tf +1 -1
  7. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/.github/workflows/no-gitlinks.yml +0 -0
  8. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/.github/workflows/publish.yml +0 -0
  9. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/.gitignore +0 -0
  10. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/CLAUDE.md +0 -0
  11. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/PROGRESS.md +0 -0
  12. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/PR_DESCRIPTION.md +0 -0
  13. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/README.md +0 -0
  14. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/TODO.md +0 -0
  15. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/admin/README.md +0 -0
  16. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/admin/generate_stats.py +0 -0
  17. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/admin/requirements.txt +0 -0
  18. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/README.md +0 -0
  19. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/ZERO_CONFIG_SETUP.md +0 -0
  20. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/SOURCES.txt +0 -0
  21. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/dependency_links.txt +0 -0
  22. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/entry_points.txt +0 -0
  23. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/requires.txt +0 -0
  24. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/top_level.txt +0 -0
  25. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/gpu_dev_cli/__init__.py +0 -0
  26. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/gpu_dev_cli/cli.py +0 -0
  27. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/gpu_dev_cli/config.py +0 -0
  28. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/gpu_dev_cli/disks.py +0 -0
  29. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/gpu_dev_cli/interactive.py +0 -0
  30. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/gpu_dev_cli/name_generator.py +0 -0
  31. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/gpu_dev_cli/reservations.py +0 -0
  32. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/gpu_dev_cli/ssh_proxy.py +0 -0
  33. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/gpu-dev-cli/minimal-iam-policy.json +0 -0
  34. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/cli-tools/scripts/clear_stale_disk_locks.py +0 -0
  35. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/docs/USER_GUIDE.md +0 -0
  36. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/docs/devgpu-features.html +0 -0
  37. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/docs/docker-mark-blue.svg +0 -0
  38. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/docs/icons8-cursor-ai.svg +0 -0
  39. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/post.md +0 -0
  40. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/setup.cfg +0 -0
  41. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/.claude/skills/deploy.md +0 -0
  42. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/.terraform.lock.hcl +0 -0
  43. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/README.md +0 -0
  44. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/alb.tf +0 -0
  45. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/availability.tf +0 -0
  46. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/backend.tf +0 -0
  47. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/.dockerignore +0 -0
  48. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/Dockerfile +0 -0
  49. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/backup-dotfiles +0 -0
  50. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/bash_profile +0 -0
  51. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/bashrc +0 -0
  52. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/bashrc_ext +0 -0
  53. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/build-with-efa.sh +0 -0
  54. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/dotfiles-shutdown-handler +0 -0
  55. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/list-dotfile-versions +0 -0
  56. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/motd_script +0 -0
  57. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/nproc_wrapper +0 -0
  58. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/profile +0 -0
  59. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/restore-dotfiles +0 -0
  60. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/restore-dotfiles-version +0 -0
  61. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/setup-dotfiles-persistence +0 -0
  62. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/shell_env +0 -0
  63. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/ssh_config +0 -0
  64. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/zprofile +0 -0
  65. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/zshrc +0 -0
  66. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker/zshrc_ext +0 -0
  67. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker-build.tf +0 -0
  68. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker-example/Dockerfile +0 -0
  69. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/docker-example/hello.txt +0 -0
  70. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/ecr.tf +0 -0
  71. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/efs.tf +0 -0
  72. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/eks.tf +0 -0
  73. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/expiry.tf +0 -0
  74. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/git-cache.tf +0 -0
  75. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/gpu-dev-pod-irsa.tf +0 -0
  76. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/kubernetes.tf +0 -0
  77. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/lambda/availability_updater/index.py +0 -0
  78. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/lambda/availability_updater/requirements.txt +0 -0
  79. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/lambda/migration/tag_largest_snapshots.py +0 -0
  80. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/lambda/reservation_expiry/index.py +0 -0
  81. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/lambda/reservation_expiry/requirements.txt +0 -0
  82. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/lambda/reservation_processor/buildkit_job.py +0 -0
  83. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/lambda/reservation_processor/requirements.txt +0 -0
  84. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/lambda/shared/__init__.py +0 -0
  85. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/lambda/shared/alb_utils.py +0 -0
  86. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/lambda/shared/dns_utils.py +0 -0
  87. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/lambda/shared/k8s_client.py +0 -0
  88. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/lambda/shared/k8s_resource_tracker.py +0 -0
  89. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/lambda/shared/requirements.txt +0 -0
  90. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/lambda/shared/snapshot_utils.py +0 -0
  91. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/main.tf +0 -0
  92. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/mig-config.tf +0 -0
  93. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/mig-parted-config.yaml +0 -0
  94. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py +0 -0
  95. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py.bak +0 -0
  96. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/migrations/check_snapshots.py +0 -0
  97. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/migrations/migrate_disks_to_named.py +0 -0
  98. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/migrations/run_backfill.sh +0 -0
  99. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/monitoring.tf +0 -0
  100. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/outputs.tf +0 -0
  101. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/pyproject.toml +0 -0
  102. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/queue.tf +0 -0
  103. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/route53.tf +0 -0
  104. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/s3-disk-contents.tf +0 -0
  105. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/scripts/CLEANUP_GUIDE.md +0 -0
  106. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/scripts/detect_empty_volumes.sh +0 -0
  107. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/scripts/ec2_avail_probe.sh +0 -0
  108. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/scripts/inspect_user_data.sh +0 -0
  109. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/ssh-proxy/Dockerfile +0 -0
  110. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/ssh-proxy/proxy.py +0 -0
  111. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/ssh-proxy/requirements.txt +0 -0
  112. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/ssh-proxy-service.tf +0 -0
  113. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/ssh-proxy.tf +0 -0
  114. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/switch-to.sh +0 -0
  115. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/templates/al2023-cpu-user-data.sh +0 -0
  116. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/templates/al2023-user-data.sh +0 -0
  117. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/templates/user-data-self-managed.sh +0 -0
  118. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/templates/user-data.sh +0 -0
  119. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/terraform-gpu-devservers/variables.tf +0 -0
  120. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/tests/submit/README.md +0 -0
  121. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/tests/submit/fail/run.sh +0 -0
  122. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/tests/submit/multinode/run.sh +0 -0
  123. {gpu_dev-0.5.23 → gpu_dev-0.5.24}/tests/submit/success/run.sh +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpu-dev
3
- Version: 0.5.23
3
+ Version: 0.5.24
4
4
  Summary: CLI tool for PyTorch GPU developer server reservations
5
5
  Author: PyTorch Team
6
6
  Requires-Python: >=3.10
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpu-dev
3
- Version: 0.5.23
3
+ Version: 0.5.24
4
4
  Summary: CLI tool for PyTorch GPU developer server reservations
5
5
  Author: PyTorch Team
6
6
  Requires-Python: >=3.10
@@ -38,6 +38,19 @@ def _load_auth_cache(github_user: str) -> Optional[Dict[str, Any]]:
38
38
  return None
39
39
  if time.time() - float(entry.get("ts", 0)) > _AUTH_CACHE_TTL_SECONDS:
40
40
  return None
41
+ # Defense against stale cache on a persistent disk that pre-dates the IRSA fix:
42
+ # if AWS_ROLE_ARN points at a role the cached ARN doesn\'t reference, the cache
43
+ # is from a different identity (e.g. IMDS-fallback before fs_group=1081 landed)
44
+ # and should be ignored.
45
+ expected_role_arn = os.environ.get("AWS_ROLE_ARN", "")
46
+ cached_arn = (entry.get("result") or {}).get("arn", "")
47
+ if expected_role_arn:
48
+ try:
49
+ role_name = expected_role_arn.rsplit("/", 1)[-1]
50
+ if role_name and role_name not in cached_arn:
51
+ return None
52
+ except Exception:
53
+ pass
41
54
  return entry.get("result")
42
55
  except Exception:
43
56
  return None
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "gpu-dev"
7
- version = "0.5.23"
7
+ version = "0.5.24"
8
8
  description = "CLI tool for PyTorch GPU developer server reservations"
9
9
  authors = [{name = "PyTorch Team"}]
10
10
  readme = "cli-tools/gpu-dev-cli/README.md"
@@ -2888,6 +2888,7 @@ def allocate_gpu_resources(reservation_id: str, request: dict[str, Any], trace_d
2888
2888
  gpu_count=gpu_count,
2889
2889
  gpu_type=gpu_type,
2890
2890
  github_public_key=github_public_key,
2891
+ github_user=github_user,
2891
2892
  reservation_id=reservation_id,
2892
2893
  jupyter_enabled=jupyter_enabled,
2893
2894
  persistent_volume_id=persistent_volume_id,
@@ -3434,7 +3435,8 @@ def create_kubernetes_resources(
3434
3435
  gpu_count: int,
3435
3436
  gpu_type: str,
3436
3437
  github_public_key: str,
3437
- reservation_id: str,
3438
+ github_user: str = "",
3439
+ reservation_id: str = None,
3438
3440
  jupyter_enabled: bool = False,
3439
3441
  persistent_volume_id: str = None,
3440
3442
  user_id: str = None,
@@ -3538,6 +3540,7 @@ def create_kubernetes_resources(
3538
3540
  gpu_count,
3539
3541
  gpu_type,
3540
3542
  github_public_key,
3543
+ github_user=github_user,
3541
3544
  jupyter_enabled=True,
3542
3545
  persistent_volume_id=persistent_volume_id,
3543
3546
  user_id=user_id,
@@ -3627,6 +3630,7 @@ def create_kubernetes_resources(
3627
3630
  gpu_count,
3628
3631
  gpu_type,
3629
3632
  github_public_key,
3633
+ github_user=github_user,
3630
3634
  jupyter_enabled=False,
3631
3635
  persistent_volume_id=persistent_volume_id,
3632
3636
  user_id=user_id,
@@ -3979,6 +3983,7 @@ def create_pod(
3979
3983
  gpu_count: int,
3980
3984
  gpu_type: str,
3981
3985
  github_public_key: str,
3986
+ github_user: str = "",
3982
3987
  jupyter_enabled: bool = False,
3983
3988
  persistent_volume_id: str = None,
3984
3989
  user_id: str = None,
@@ -180,7 +180,7 @@ resource "aws_lambda_function" "reservation_processor" {
180
180
  HOSTED_ZONE_ID = local.effective_domain_name != "" ? local.hosted_zone_id : ""
181
181
  SSH_DOMAIN_MAPPINGS_TABLE = local.effective_domain_name != "" ? aws_dynamodb_table.ssh_domain_mappings.name : ""
182
182
  SSL_CERTIFICATE_ARN = local.effective_domain_name != "" ? aws_acm_certificate.wildcard[0].arn : ""
183
- LAMBDA_VERSION = "0.5.24"
183
+ LAMBDA_VERSION = "0.5.25"
184
184
  MIN_CLI_VERSION = "0.5.16"
185
185
  DISK_CONTENTS_BUCKET = aws_s3_bucket.disk_contents.bucket
186
186
  OPERATIONS_TABLE = aws_dynamodb_table.operations.name
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes