gpu-dev 0.5.14__tar.gz → 0.5.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/PKG-INFO +1 -1
  2. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/PKG-INFO +1 -1
  3. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/gpu_dev_cli/auth.py +69 -20
  4. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/pyproject.toml +1 -1
  5. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/.github/workflows/no-gitlinks.yml +0 -0
  6. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/.github/workflows/publish.yml +0 -0
  7. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/.gitignore +0 -0
  8. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/CLAUDE.md +0 -0
  9. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/PROGRESS.md +0 -0
  10. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/PR_DESCRIPTION.md +0 -0
  11. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/TODO.md +0 -0
  12. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/admin/README.md +0 -0
  13. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/admin/generate_stats.py +0 -0
  14. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/admin/requirements.txt +0 -0
  15. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/README.md +0 -0
  16. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/ZERO_CONFIG_SETUP.md +0 -0
  17. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/SOURCES.txt +0 -0
  18. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/dependency_links.txt +0 -0
  19. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/entry_points.txt +0 -0
  20. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/requires.txt +0 -0
  21. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/top_level.txt +0 -0
  22. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/gpu_dev_cli/__init__.py +0 -0
  23. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/gpu_dev_cli/cli.py +0 -0
  24. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/gpu_dev_cli/config.py +0 -0
  25. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/gpu_dev_cli/disks.py +0 -0
  26. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/gpu_dev_cli/interactive.py +0 -0
  27. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/gpu_dev_cli/name_generator.py +0 -0
  28. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/gpu_dev_cli/reservations.py +0 -0
  29. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/gpu_dev_cli/ssh_proxy.py +0 -0
  30. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/gpu-dev-cli/minimal-iam-policy.json +0 -0
  31. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/cli-tools/scripts/clear_stale_disk_locks.py +0 -0
  32. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/docs/USER_GUIDE.md +0 -0
  33. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/docs/devgpu-features.html +0 -0
  34. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/docs/docker-mark-blue.svg +0 -0
  35. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/docs/icons8-cursor-ai.svg +0 -0
  36. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/post.md +0 -0
  37. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/setup.cfg +0 -0
  38. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/.claude/skills/deploy.md +0 -0
  39. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/.terraform.lock.hcl +0 -0
  40. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/README.md +0 -0
  41. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/alb.tf +0 -0
  42. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/availability.tf +0 -0
  43. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/backend.tf +0 -0
  44. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/.dockerignore +0 -0
  45. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/Dockerfile +0 -0
  46. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/backup-dotfiles +0 -0
  47. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/bash_profile +0 -0
  48. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/bashrc +0 -0
  49. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/bashrc_ext +0 -0
  50. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/build-with-efa.sh +0 -0
  51. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/dotfiles-shutdown-handler +0 -0
  52. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/list-dotfile-versions +0 -0
  53. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/motd_script +0 -0
  54. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/nproc_wrapper +0 -0
  55. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/profile +0 -0
  56. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/restore-dotfiles +0 -0
  57. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/restore-dotfiles-version +0 -0
  58. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/setup-dotfiles-persistence +0 -0
  59. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/shell_env +0 -0
  60. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/ssh_config +0 -0
  61. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/zprofile +0 -0
  62. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/zshrc +0 -0
  63. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker/zshrc_ext +0 -0
  64. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker-build.tf +0 -0
  65. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker-example/Dockerfile +0 -0
  66. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/docker-example/hello.txt +0 -0
  67. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/ecr.tf +0 -0
  68. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/efs.tf +0 -0
  69. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/eks.tf +0 -0
  70. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/expiry.tf +0 -0
  71. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/git-cache.tf +0 -0
  72. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/kubernetes.tf +0 -0
  73. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/lambda/availability_updater/index.py +0 -0
  74. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/lambda/availability_updater/requirements.txt +0 -0
  75. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/lambda/migration/tag_largest_snapshots.py +0 -0
  76. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/lambda/reservation_expiry/index.py +0 -0
  77. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/lambda/reservation_expiry/requirements.txt +0 -0
  78. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/lambda/reservation_processor/buildkit_job.py +0 -0
  79. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/lambda/reservation_processor/index.py +0 -0
  80. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/lambda/reservation_processor/requirements.txt +0 -0
  81. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/lambda/shared/__init__.py +0 -0
  82. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/lambda/shared/alb_utils.py +0 -0
  83. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/lambda/shared/dns_utils.py +0 -0
  84. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/lambda/shared/k8s_client.py +0 -0
  85. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/lambda/shared/k8s_resource_tracker.py +0 -0
  86. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/lambda/shared/requirements.txt +0 -0
  87. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/lambda/shared/snapshot_utils.py +0 -0
  88. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/lambda.tf +0 -0
  89. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/main.tf +0 -0
  90. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/mig-config.tf +0 -0
  91. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/mig-parted-config.yaml +0 -0
  92. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py +0 -0
  93. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py.bak +0 -0
  94. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/migrations/check_snapshots.py +0 -0
  95. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/migrations/migrate_disks_to_named.py +0 -0
  96. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/migrations/run_backfill.sh +0 -0
  97. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/monitoring.tf +0 -0
  98. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/outputs.tf +0 -0
  99. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/pyproject.toml +0 -0
  100. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/queue.tf +0 -0
  101. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/route53.tf +0 -0
  102. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/s3-disk-contents.tf +0 -0
  103. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/scripts/CLEANUP_GUIDE.md +0 -0
  104. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/scripts/detect_empty_volumes.sh +0 -0
  105. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/scripts/ec2_avail_probe.sh +0 -0
  106. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/scripts/inspect_user_data.sh +0 -0
  107. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/ssh-proxy/Dockerfile +0 -0
  108. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/ssh-proxy/proxy.py +0 -0
  109. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/ssh-proxy/requirements.txt +0 -0
  110. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/ssh-proxy-service.tf +0 -0
  111. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/ssh-proxy.tf +0 -0
  112. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/switch-to.sh +0 -0
  113. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/templates/al2023-cpu-user-data.sh +0 -0
  114. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/templates/al2023-user-data.sh +0 -0
  115. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/templates/user-data-self-managed.sh +0 -0
  116. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/templates/user-data.sh +0 -0
  117. {gpu_dev-0.5.14 → gpu_dev-0.5.15}/terraform-gpu-devservers/variables.tf +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpu-dev
3
- Version: 0.5.14
3
+ Version: 0.5.15
4
4
  Summary: CLI tool for PyTorch GPU developer server reservations
5
5
  Author: PyTorch Team
6
6
  Requires-Python: >=3.10
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpu-dev
3
- Version: 0.5.14
3
+ Version: 0.5.15
4
4
  Summary: CLI tool for PyTorch GPU developer server reservations
5
5
  Author: PyTorch Team
6
6
  Requires-Python: >=3.10
@@ -16,6 +16,53 @@ from rich.spinner import Spinner
16
16
  _SSH_CACHE_TTL_SECONDS = 24 * 60 * 60
17
17
  _SSH_CACHE_PATH = Path(os.path.expanduser("~/.config/gpu-dev/ssh-validation-cache.json"))
18
18
 
19
+ # Cache for authenticate_user. STS GetCallerIdentity is stable per AWS profile and slow under SSO
20
+ # (~500ms-1.5s). Cache for 24h keyed by AWS_PROFILE; if creds rotate the user_id rarely changes,
21
+ # and the next AWS call (DDB/SQS) will surface a credential error if it does.
22
+ _AUTH_CACHE_TTL_SECONDS = 24 * 60 * 60
23
+ _AUTH_CACHE_PATH = Path(os.path.expanduser("~/.config/gpu-dev/auth-cache.json"))
24
+
25
+
26
+ def _auth_cache_key() -> str:
27
+ return os.environ.get("AWS_PROFILE", "default")
28
+
29
+
30
+ def _load_auth_cache(github_user: str) -> Optional[Dict[str, Any]]:
31
+ try:
32
+ if not _AUTH_CACHE_PATH.exists():
33
+ return None
34
+ with open(_AUTH_CACHE_PATH) as f:
35
+ data = json.load(f)
36
+ entry = data.get(_auth_cache_key())
37
+ if not entry or entry.get("github_user") != github_user:
38
+ return None
39
+ if time.time() - float(entry.get("ts", 0)) > _AUTH_CACHE_TTL_SECONDS:
40
+ return None
41
+ return entry.get("result")
42
+ except Exception:
43
+ return None
44
+
45
+
46
+ def _save_auth_cache(github_user: str, result: Dict[str, Any]) -> None:
47
+ try:
48
+ _AUTH_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
49
+ data = {}
50
+ if _AUTH_CACHE_PATH.exists():
51
+ try:
52
+ with open(_AUTH_CACHE_PATH) as f:
53
+ data = json.load(f)
54
+ except Exception:
55
+ data = {}
56
+ data[_auth_cache_key()] = {
57
+ "github_user": github_user,
58
+ "ts": int(time.time()),
59
+ "result": result,
60
+ }
61
+ with open(_AUTH_CACHE_PATH, "w") as f:
62
+ json.dump(data, f)
63
+ except Exception:
64
+ pass
65
+
19
66
 
20
67
  def _load_ssh_cache(github_user: str) -> Optional[Dict[str, Any]]:
21
68
  """Return cached validation if it's fresh and matches the configured github_user, else None."""
@@ -50,31 +97,33 @@ def _save_ssh_cache(github_user: str, result: Dict[str, Any]) -> None:
50
97
 
51
98
 
52
99
  def authenticate_user(config: Config) -> Dict[str, Any]:
53
- """Authenticate using AWS credentials - if you can call AWS, you're authorized"""
54
- try:
55
- # Test AWS access by getting caller identity
56
- identity = config.get_user_identity()
100
+ """Authenticate using AWS credentials - if you can call AWS, you're authorized.
57
101
 
58
- # Test specific resource access by trying to get queue URL
59
- config.get_queue_url()
60
-
61
- # Extract user info from AWS ARN
62
- arn = identity["arn"]
63
- user_name = arn.split("/")[-1] # Extract username from ARN
102
+ Cached for 24h per AWS profile. The previous SQS get_queue_url probe was dropped:
103
+ it's a redundant permission check; reserve/cancel call SQS directly and surface
104
+ failures themselves, while list/show/avail don't touch SQS at all.
105
+ """
106
+ github_user = config.get_github_username()
107
+ if not github_user:
108
+ raise RuntimeError(
109
+ "GitHub username not configured. Please run: gpu-dev config set github_user <your-github-username>"
110
+ )
64
111
 
65
- # Get GitHub username from config
66
- github_user = config.get_github_username()
67
- if not github_user:
68
- raise RuntimeError(
69
- f"GitHub username not configured. Please run: gpu-dev config set github_user <your-github-username>"
70
- )
112
+ cached = _load_auth_cache(github_user)
113
+ if cached is not None:
114
+ return cached
71
115
 
72
- return {
73
- "user_id": user_name, # AWS username for reservation ownership
74
- "github_user": github_user, # GitHub username for SSH keys
116
+ try:
117
+ identity = config.get_user_identity()
118
+ arn = identity["arn"]
119
+ user_name = arn.split("/")[-1]
120
+ result = {
121
+ "user_id": user_name,
122
+ "github_user": github_user,
75
123
  "arn": arn,
76
124
  }
77
-
125
+ _save_auth_cache(github_user, result)
126
+ return result
78
127
  except Exception as e:
79
128
  raise RuntimeError(f"AWS authentication failed: {e}")
80
129
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "gpu-dev"
7
- version = "0.5.14"
7
+ version = "0.5.15"
8
8
  description = "CLI tool for PyTorch GPU developer server reservations"
9
9
  authors = [{name = "PyTorch Team"}]
10
10
  readme = "cli-tools/gpu-dev-cli/README.md"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes