gpu-dev 0.5.30__tar.gz → 0.5.32__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/CLAUDE.md +52 -0
  2. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/PKG-INFO +1 -1
  3. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/PKG-INFO +1 -1
  4. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/SOURCES.txt +7 -4
  5. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/cli.py +316 -119
  6. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/config.py +1 -0
  7. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/interactive.py +217 -78
  8. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/reservations.py +43 -4
  9. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/ssh_proxy.py +4 -2
  10. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/pyproject.toml +1 -1
  11. gpu_dev-0.5.32/terraform-gpu-devservers/ami-baker.tf +144 -0
  12. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/availability.tf +11 -1
  13. gpu_dev-0.5.32/terraform-gpu-devservers/check_b200.py +21 -0
  14. gpu_dev-0.5.32/terraform-gpu-devservers/cluster-autoscaler.tf +47 -0
  15. gpu_dev-0.5.32/terraform-gpu-devservers/cmd_proxy.py +49 -0
  16. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/Dockerfile +9 -9
  17. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/ecr.tf +73 -4
  18. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/eks.tf +90 -11
  19. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/expiry.tf +3 -0
  20. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/gpu-dev-pod-irsa.tf +31 -0
  21. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/kubernetes.tf +13 -13
  22. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/availability_updater/index.py +108 -18
  23. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/reservation_expiry/index.py +33 -3
  24. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/reservation_processor/index.py +210 -27
  25. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda.tf +9 -3
  26. gpu_dev-0.5.32/terraform-gpu-devservers/list_b200.py +68 -0
  27. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/main.tf +29 -8
  28. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/ssh-proxy-service.tf +8 -7
  29. gpu_dev-0.5.32/terraform-gpu-devservers/subnet-0fe3a2c45570091ad +0 -0
  30. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/templates/al2023-user-data.sh +16 -7
  31. gpu_dev-0.5.32/terraform-gpu-devservers/templates/ami-baker-user-data.sh +45 -0
  32. gpu_dev-0.5.30/PROGRESS.md +0 -288
  33. gpu_dev-0.5.30/PR_DESCRIPTION.md +0 -168
  34. gpu_dev-0.5.30/TODO.md +0 -64
  35. gpu_dev-0.5.30/post.md +0 -233
  36. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/.github/workflows/no-gitlinks.yml +0 -0
  37. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/.github/workflows/publish.yml +0 -0
  38. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/.gitignore +0 -0
  39. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/README.md +0 -0
  40. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/admin/README.md +0 -0
  41. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/admin/generate_stats.py +0 -0
  42. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/admin/requirements.txt +0 -0
  43. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/README.md +0 -0
  44. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/ZERO_CONFIG_SETUP.md +0 -0
  45. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/dependency_links.txt +0 -0
  46. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/entry_points.txt +0 -0
  47. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/requires.txt +0 -0
  48. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/top_level.txt +0 -0
  49. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/__init__.py +0 -0
  50. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/auth.py +0 -0
  51. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/disks.py +0 -0
  52. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/name_generator.py +0 -0
  53. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/minimal-iam-policy.json +0 -0
  54. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/scripts/clear_stale_disk_locks.py +0 -0
  55. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/docs/USER_GUIDE.md +0 -0
  56. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/docs/devgpu-features.html +0 -0
  57. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/docs/docker-mark-blue.svg +0 -0
  58. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/docs/icons8-cursor-ai.svg +0 -0
  59. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/setup.cfg +0 -0
  60. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/.claude/skills/deploy.md +0 -0
  61. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/.terraform.lock.hcl +0 -0
  62. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/README.md +0 -0
  63. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/alb.tf +0 -0
  64. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/backend.tf +0 -0
  65. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/.dockerignore +0 -0
  66. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/backup-dotfiles +0 -0
  67. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/bash_profile +0 -0
  68. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/bashrc +0 -0
  69. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/bashrc_ext +0 -0
  70. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/build-with-efa.sh +0 -0
  71. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/dotfiles-shutdown-handler +0 -0
  72. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/list-dotfile-versions +0 -0
  73. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/motd_script +0 -0
  74. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/nproc_wrapper +0 -0
  75. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/profile +0 -0
  76. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/restore-dotfiles +0 -0
  77. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/restore-dotfiles-version +0 -0
  78. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/setup-dotfiles-persistence +0 -0
  79. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/shell_env +0 -0
  80. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/ssh_config +0 -0
  81. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/zprofile +0 -0
  82. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/zshrc +0 -0
  83. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/zshrc_ext +0 -0
  84. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker-build.tf +0 -0
  85. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker-example/Dockerfile +0 -0
  86. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker-example/hello.txt +0 -0
  87. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/efs.tf +0 -0
  88. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/git-cache.tf +0 -0
  89. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/availability_updater/requirements.txt +0 -0
  90. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/migration/tag_largest_snapshots.py +0 -0
  91. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/reservation_expiry/requirements.txt +0 -0
  92. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/reservation_processor/buildkit_job.py +0 -0
  93. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/reservation_processor/requirements.txt +0 -0
  94. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/shared/__init__.py +0 -0
  95. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/shared/alb_utils.py +0 -0
  96. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/shared/dns_utils.py +0 -0
  97. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/shared/k8s_client.py +0 -0
  98. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/shared/k8s_resource_tracker.py +0 -0
  99. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/shared/requirements.txt +0 -0
  100. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/shared/snapshot_utils.py +0 -0
  101. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/mig-config.tf +0 -0
  102. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/mig-parted-config.yaml +0 -0
  103. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py +0 -0
  104. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py.bak +0 -0
  105. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/migrations/check_snapshots.py +0 -0
  106. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/migrations/migrate_disks_to_named.py +0 -0
  107. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/migrations/run_backfill.sh +0 -0
  108. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/monitoring.tf +0 -0
  109. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/node-termination-handler.tf +0 -0
  110. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/outputs.tf +0 -0
  111. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/pyproject.toml +0 -0
  112. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/queue.tf +0 -0
  113. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/route53.tf +0 -0
  114. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/s3-disk-contents.tf +0 -0
  115. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/scripts/CLEANUP_GUIDE.md +0 -0
  116. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/scripts/detect_empty_volumes.sh +0 -0
  117. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/scripts/ec2_avail_probe.sh +0 -0
  118. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/scripts/inspect_user_data.sh +0 -0
  119. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/ssh-proxy/Dockerfile +0 -0
  120. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/ssh-proxy/proxy.py +0 -0
  121. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/ssh-proxy/requirements.txt +0 -0
  122. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/ssh-proxy.tf +0 -0
  123. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/switch-to.sh +0 -0
  124. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/templates/al2023-cpu-user-data.sh +0 -0
  125. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/templates/user-data-self-managed.sh +0 -0
  126. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/templates/user-data.sh +0 -0
  127. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/variables.tf +0 -0
  128. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/tests/submit/README.md +0 -0
  129. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/tests/submit/fail/run.sh +0 -0
  130. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/tests/submit/multinode/run.sh +0 -0
  131. {gpu_dev-0.5.30 → gpu_dev-0.5.32}/tests/submit/success/run.sh +0 -0
@@ -183,6 +183,55 @@ kubectl port-forward -n monitoring svc/kube-prometheus-stack-prometheus 9090:909
183
183
  kubectl get pods -n monitoring -l app.kubernetes.io/name=grafana
184
184
  ```
185
185
 
186
+ ## Multi-Region Single-State Refactor (Research Notes, May 2026)
187
+
188
+ **Goal:** One `tf apply` manages all regions. No more `tf-all`, no double Docker builds, no double AMI bakes.
189
+
190
+ **Approach:** Module-per-region pattern.
191
+ ```hcl
192
+ # root main.tf
193
+ module "us_east_2" {
194
+ source = "./modules/region"
195
+ region = "us-east-2"
196
+ gpu_types = { h100 = {...}, b200 = {...}, ... }
197
+ spot_types = []
198
+ providers = { aws = aws.us_east_2 }
199
+ }
200
+ module "us_east_1" {
201
+ source = "./modules/region"
202
+ region = "us-east-1"
203
+ gpu_types = { b300 = {...}, t4 = {...}, ... }
204
+ spot_types = ["b300", "b200", "h100", ...]
205
+ providers = { aws = aws.us_east_1 }
206
+ }
207
+ ```
208
+
209
+ **What goes in the module:** VPC, subnets, EKS cluster, ASGs, launch templates, Lambda functions, DDB tables, EFS, monitoring, DNS. Basically everything in the current root except provider config and shared resources.
210
+
211
+ **What stays at root:** Provider blocks with aliases, ECR replication config, AMI copy (`aws_ami_copy` from primary to secondary regions), global IAM roles if any, CLI config.
212
+
213
+ **AMI sharing:** Build baked AMI in us-east-2 (primary), `aws_ami_copy` to other regions. One build, replicated. The `ami_baker` stays in root, outputs AMI ID, each module receives it as a variable.
214
+
215
+ **Docker sharing:** ECR replication already set up. Docker builds once in primary region, auto-replicates.
216
+
217
+ **Migration plan (since nobody uses east1 yet):**
218
+ 1. `tofu workspace select prod-east1 && tofu destroy` — clean slate
219
+ 2. Move all resources into `modules/region/`
220
+ 3. Create provider aliases in root
221
+ 4. Import prod (us-east-2) resources into new module state: `tofu import module.us_east_2.aws_vpc.gpu_dev_vpc vpc-xxx`
222
+ 5. Add us-east-1 module — fresh create, no import needed
223
+ 6. Delete workspace: `tofu workspace delete prod-east1`
224
+
225
+ **Risks:**
226
+ - Import step for prod is tedious (~50+ resources) but mechanical
227
+ - Lambda zip paths need to be relative to module, not root
228
+ - EKS auth (aws-auth ConfigMap) is per-cluster — each module manages its own
229
+ - CLI needs to know which region to query — already handled by config
230
+
231
+ **Estimated effort:** 1 dedicated session (~4-6 hours). Most time on the module extraction + prod import.
232
+
233
+ **Prerequisite for:** Adding us-west-1, us-west-2, or any future region (becomes one module block each).
234
+
186
235
  ## Recent Fixes (Oct 27, 2025)
187
236
 
188
237
  **NVIDIA Profiling Bootstrap Configuration (Oct 27, 2025):**
@@ -232,6 +281,9 @@ kubectl get pods -n monitoring -l app.kubernetes.io/name=grafana
232
281
 
233
282
  ### 📋 Remaining Tasks
234
283
 
284
+ - **Merge multi-region into single tf state** - HIGH PRIORITY. Kill prod-east1 workspace, refactor into module-per-region in one state. See research notes below. Enables: one `tf apply`, shared AMI (aws_ami_copy), shared Docker (ECR replication already set up), no double builds. Prerequisite for adding west regions.
285
+ - **Add us-west-1 and us-west-2 spot regions** - BLOCKED on single-state refactor. After refactor, adding a region = adding one module block.
286
+ - **Spot UX improvements** - Queue position should be #1 for each type (not cross-type FIFO). Status should show "queued (waiting for capacity)" not just "queued". Interactive picker should show spot GPU counts from east1 not prod.
235
287
  - **FQDN for devservers** - Set up proper domain names for development server access
236
288
  - **Automated SSH config per reservation** - ✅ DONE - Each reservation now gets `~/.devgpu/<reservation_id>-sshconfig` file, use with `ssh -F ~/.devgpu/<reservation_id>-sshconfig <pod_name>`
237
289
  - **Custom Docker image scaffold** - Create Dockerfile with pre-installed packages (Jupyter, etc.)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpu-dev
3
- Version: 0.5.30
3
+ Version: 0.5.32
4
4
  Summary: CLI tool for PyTorch GPU developer server reservations
5
5
  Author: PyTorch Team
6
6
  Requires-Python: >=3.10
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpu-dev
3
- Version: 0.5.30
3
+ Version: 0.5.32
4
4
  Summary: CLI tool for PyTorch GPU developer server reservations
5
5
  Author: PyTorch Team
6
6
  Requires-Python: >=3.10
@@ -1,10 +1,6 @@
1
1
  .gitignore
2
2
  CLAUDE.md
3
- PROGRESS.md
4
- PR_DESCRIPTION.md
5
3
  README.md
6
- TODO.md
7
- post.md
8
4
  pyproject.toml
9
5
  .github/workflows/no-gitlinks.yml
10
6
  .github/workflows/publish.yml
@@ -37,8 +33,12 @@ docs/icons8-cursor-ai.svg
37
33
  terraform-gpu-devservers/.terraform.lock.hcl
38
34
  terraform-gpu-devservers/README.md
39
35
  terraform-gpu-devservers/alb.tf
36
+ terraform-gpu-devservers/ami-baker.tf
40
37
  terraform-gpu-devservers/availability.tf
41
38
  terraform-gpu-devservers/backend.tf
39
+ terraform-gpu-devservers/check_b200.py
40
+ terraform-gpu-devservers/cluster-autoscaler.tf
41
+ terraform-gpu-devservers/cmd_proxy.py
42
42
  terraform-gpu-devservers/docker-build.tf
43
43
  terraform-gpu-devservers/ecr.tf
44
44
  terraform-gpu-devservers/efs.tf
@@ -48,6 +48,7 @@ terraform-gpu-devservers/git-cache.tf
48
48
  terraform-gpu-devservers/gpu-dev-pod-irsa.tf
49
49
  terraform-gpu-devservers/kubernetes.tf
50
50
  terraform-gpu-devservers/lambda.tf
51
+ terraform-gpu-devservers/list_b200.py
51
52
  terraform-gpu-devservers/main.tf
52
53
  terraform-gpu-devservers/mig-config.tf
53
54
  terraform-gpu-devservers/mig-parted-config.yaml
@@ -60,6 +61,7 @@ terraform-gpu-devservers/route53.tf
60
61
  terraform-gpu-devservers/s3-disk-contents.tf
61
62
  terraform-gpu-devservers/ssh-proxy-service.tf
62
63
  terraform-gpu-devservers/ssh-proxy.tf
64
+ terraform-gpu-devservers/subnet-0fe3a2c45570091ad
63
65
  terraform-gpu-devservers/switch-to.sh
64
66
  terraform-gpu-devservers/variables.tf
65
67
  terraform-gpu-devservers/.claude/skills/deploy.md
@@ -114,6 +116,7 @@ terraform-gpu-devservers/ssh-proxy/proxy.py
114
116
  terraform-gpu-devservers/ssh-proxy/requirements.txt
115
117
  terraform-gpu-devservers/templates/al2023-cpu-user-data.sh
116
118
  terraform-gpu-devservers/templates/al2023-user-data.sh
119
+ terraform-gpu-devservers/templates/ami-baker-user-data.sh
117
120
  terraform-gpu-devservers/templates/user-data-self-managed.sh
118
121
  terraform-gpu-devservers/templates/user-data.sh
119
122
  tests/submit/README.md