gpu-dev 0.5.30__tar.gz → 0.5.32__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/CLAUDE.md +52 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/PKG-INFO +1 -1
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/PKG-INFO +1 -1
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/SOURCES.txt +7 -4
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/cli.py +316 -119
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/config.py +1 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/interactive.py +217 -78
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/reservations.py +43 -4
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/ssh_proxy.py +4 -2
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/pyproject.toml +1 -1
- gpu_dev-0.5.32/terraform-gpu-devservers/ami-baker.tf +144 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/availability.tf +11 -1
- gpu_dev-0.5.32/terraform-gpu-devservers/check_b200.py +21 -0
- gpu_dev-0.5.32/terraform-gpu-devservers/cluster-autoscaler.tf +47 -0
- gpu_dev-0.5.32/terraform-gpu-devservers/cmd_proxy.py +49 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/Dockerfile +9 -9
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/ecr.tf +73 -4
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/eks.tf +90 -11
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/expiry.tf +3 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/gpu-dev-pod-irsa.tf +31 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/kubernetes.tf +13 -13
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/availability_updater/index.py +108 -18
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/reservation_expiry/index.py +33 -3
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/reservation_processor/index.py +210 -27
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda.tf +9 -3
- gpu_dev-0.5.32/terraform-gpu-devservers/list_b200.py +68 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/main.tf +29 -8
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/ssh-proxy-service.tf +8 -7
- gpu_dev-0.5.32/terraform-gpu-devservers/subnet-0fe3a2c45570091ad +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/templates/al2023-user-data.sh +16 -7
- gpu_dev-0.5.32/terraform-gpu-devservers/templates/ami-baker-user-data.sh +45 -0
- gpu_dev-0.5.30/PROGRESS.md +0 -288
- gpu_dev-0.5.30/PR_DESCRIPTION.md +0 -168
- gpu_dev-0.5.30/TODO.md +0 -64
- gpu_dev-0.5.30/post.md +0 -233
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/.github/workflows/no-gitlinks.yml +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/.github/workflows/publish.yml +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/.gitignore +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/README.md +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/admin/README.md +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/admin/generate_stats.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/admin/requirements.txt +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/README.md +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/ZERO_CONFIG_SETUP.md +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/dependency_links.txt +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/entry_points.txt +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/requires.txt +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/top_level.txt +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/__init__.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/auth.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/disks.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/gpu_dev_cli/name_generator.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/gpu-dev-cli/minimal-iam-policy.json +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/cli-tools/scripts/clear_stale_disk_locks.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/docs/USER_GUIDE.md +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/docs/devgpu-features.html +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/docs/docker-mark-blue.svg +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/docs/icons8-cursor-ai.svg +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/setup.cfg +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/.claude/skills/deploy.md +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/.terraform.lock.hcl +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/README.md +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/alb.tf +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/backend.tf +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/.dockerignore +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/backup-dotfiles +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/bash_profile +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/bashrc +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/bashrc_ext +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/build-with-efa.sh +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/dotfiles-shutdown-handler +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/list-dotfile-versions +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/motd_script +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/nproc_wrapper +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/profile +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/restore-dotfiles +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/restore-dotfiles-version +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/setup-dotfiles-persistence +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/shell_env +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/ssh_config +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/zprofile +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/zshrc +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker/zshrc_ext +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker-build.tf +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker-example/Dockerfile +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/docker-example/hello.txt +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/efs.tf +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/git-cache.tf +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/availability_updater/requirements.txt +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/migration/tag_largest_snapshots.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/reservation_expiry/requirements.txt +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/reservation_processor/buildkit_job.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/reservation_processor/requirements.txt +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/shared/__init__.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/shared/alb_utils.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/shared/dns_utils.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/shared/k8s_client.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/shared/k8s_resource_tracker.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/shared/requirements.txt +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/lambda/shared/snapshot_utils.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/mig-config.tf +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/mig-parted-config.yaml +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py.bak +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/migrations/check_snapshots.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/migrations/migrate_disks_to_named.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/migrations/run_backfill.sh +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/monitoring.tf +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/node-termination-handler.tf +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/outputs.tf +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/pyproject.toml +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/queue.tf +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/route53.tf +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/s3-disk-contents.tf +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/scripts/CLEANUP_GUIDE.md +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/scripts/detect_empty_volumes.sh +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/scripts/ec2_avail_probe.sh +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/scripts/inspect_user_data.sh +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/ssh-proxy/Dockerfile +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/ssh-proxy/proxy.py +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/ssh-proxy/requirements.txt +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/ssh-proxy.tf +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/switch-to.sh +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/templates/al2023-cpu-user-data.sh +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/templates/user-data-self-managed.sh +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/templates/user-data.sh +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/terraform-gpu-devservers/variables.tf +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/tests/submit/README.md +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/tests/submit/fail/run.sh +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/tests/submit/multinode/run.sh +0 -0
- {gpu_dev-0.5.30 → gpu_dev-0.5.32}/tests/submit/success/run.sh +0 -0
|
@@ -183,6 +183,55 @@ kubectl port-forward -n monitoring svc/kube-prometheus-stack-prometheus 9090:909
|
|
|
183
183
|
kubectl get pods -n monitoring -l app.kubernetes.io/name=grafana
|
|
184
184
|
```
|
|
185
185
|
|
|
186
|
+
## Multi-Region Single-State Refactor (Research Notes, May 2026)
|
|
187
|
+
|
|
188
|
+
**Goal:** One `tf apply` manages all regions. No more `tf-all`, no double Docker builds, no double AMI bakes.
|
|
189
|
+
|
|
190
|
+
**Approach:** Module-per-region pattern.
|
|
191
|
+
```hcl
|
|
192
|
+
# root main.tf
|
|
193
|
+
module "us_east_2" {
|
|
194
|
+
source = "./modules/region"
|
|
195
|
+
region = "us-east-2"
|
|
196
|
+
gpu_types = { h100 = {...}, b200 = {...}, ... }
|
|
197
|
+
spot_types = []
|
|
198
|
+
providers = { aws = aws.us_east_2 }
|
|
199
|
+
}
|
|
200
|
+
module "us_east_1" {
|
|
201
|
+
source = "./modules/region"
|
|
202
|
+
region = "us-east-1"
|
|
203
|
+
gpu_types = { b300 = {...}, t4 = {...}, ... }
|
|
204
|
+
spot_types = ["b300", "b200", "h100", ...]
|
|
205
|
+
providers = { aws = aws.us_east_1 }
|
|
206
|
+
}
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
**What goes in the module:** VPC, subnets, EKS cluster, ASGs, launch templates, Lambda functions, DDB tables, EFS, monitoring, DNS. Basically everything in the current root except provider config and shared resources.
|
|
210
|
+
|
|
211
|
+
**What stays at root:** Provider blocks with aliases, ECR replication config, AMI copy (`aws_ami_copy` from primary to secondary regions), global IAM roles if any, CLI config.
|
|
212
|
+
|
|
213
|
+
**AMI sharing:** Build baked AMI in us-east-2 (primary), `aws_ami_copy` to other regions. One build, replicated. The `ami_baker` stays in root, outputs AMI ID, each module receives it as a variable.
|
|
214
|
+
|
|
215
|
+
**Docker sharing:** ECR replication already set up. Docker builds once in primary region, auto-replicates.
|
|
216
|
+
|
|
217
|
+
**Migration plan (since nobody uses east1 yet):**
|
|
218
|
+
1. `tofu workspace select prod-east1 && tofu destroy` — clean slate
|
|
219
|
+
2. Move all resources into `modules/region/`
|
|
220
|
+
3. Create provider aliases in root
|
|
221
|
+
4. Import prod (us-east-2) resources into new module state: `tofu import module.us_east_2.aws_vpc.gpu_dev_vpc vpc-xxx`
|
|
222
|
+
5. Add us-east-1 module — fresh create, no import needed
|
|
223
|
+
6. Delete workspace: `tofu workspace delete prod-east1`
|
|
224
|
+
|
|
225
|
+
**Risks:**
|
|
226
|
+
- Import step for prod is tedious (~50+ resources) but mechanical
|
|
227
|
+
- Lambda zip paths need to be relative to module, not root
|
|
228
|
+
- EKS auth (aws-auth ConfigMap) is per-cluster — each module manages its own
|
|
229
|
+
- CLI needs to know which region to query — already handled by config
|
|
230
|
+
|
|
231
|
+
**Estimated effort:** 1 dedicated session (~4-6 hours). Most time on the module extraction + prod import.
|
|
232
|
+
|
|
233
|
+
**Prerequisite for:** Adding us-west-1, us-west-2, or any future region (becomes one module block each).
|
|
234
|
+
|
|
186
235
|
## Recent Fixes (Oct 27, 2025)
|
|
187
236
|
|
|
188
237
|
**NVIDIA Profiling Bootstrap Configuration (Oct 27, 2025):**
|
|
@@ -232,6 +281,9 @@ kubectl get pods -n monitoring -l app.kubernetes.io/name=grafana
|
|
|
232
281
|
|
|
233
282
|
### 📋 Remaining Tasks
|
|
234
283
|
|
|
284
|
+
- **Merge multi-region into single tf state** - HIGH PRIORITY. Kill prod-east1 workspace, refactor into module-per-region in one state. See research notes below. Enables: one `tf apply`, shared AMI (aws_ami_copy), shared Docker (ECR replication already set up), no double builds. Prerequisite for adding west regions.
|
|
285
|
+
- **Add us-west-1 and us-west-2 spot regions** - BLOCKED on single-state refactor. After refactor, adding a region = adding one module block.
|
|
286
|
+
- **Spot UX improvements** - Queue position should be #1 for each type (not cross-type FIFO). Status should show "queued (waiting for capacity)" not just "queued". Interactive picker should show spot GPU counts from east1 not prod.
|
|
235
287
|
- **FQDN for devservers** - Set up proper domain names for development server access
|
|
236
288
|
- **Automated SSH config per reservation** - ✅ DONE - Each reservation now gets `~/.devgpu/<reservation_id>-sshconfig` file, use with `ssh -F ~/.devgpu/<reservation_id>-sshconfig <pod_name>`
|
|
237
289
|
- **Custom Docker image scaffold** - Create Dockerfile with pre-installed packages (Jupyter, etc.)
|
|
@@ -1,10 +1,6 @@
|
|
|
1
1
|
.gitignore
|
|
2
2
|
CLAUDE.md
|
|
3
|
-
PROGRESS.md
|
|
4
|
-
PR_DESCRIPTION.md
|
|
5
3
|
README.md
|
|
6
|
-
TODO.md
|
|
7
|
-
post.md
|
|
8
4
|
pyproject.toml
|
|
9
5
|
.github/workflows/no-gitlinks.yml
|
|
10
6
|
.github/workflows/publish.yml
|
|
@@ -37,8 +33,12 @@ docs/icons8-cursor-ai.svg
|
|
|
37
33
|
terraform-gpu-devservers/.terraform.lock.hcl
|
|
38
34
|
terraform-gpu-devservers/README.md
|
|
39
35
|
terraform-gpu-devservers/alb.tf
|
|
36
|
+
terraform-gpu-devservers/ami-baker.tf
|
|
40
37
|
terraform-gpu-devservers/availability.tf
|
|
41
38
|
terraform-gpu-devservers/backend.tf
|
|
39
|
+
terraform-gpu-devservers/check_b200.py
|
|
40
|
+
terraform-gpu-devservers/cluster-autoscaler.tf
|
|
41
|
+
terraform-gpu-devservers/cmd_proxy.py
|
|
42
42
|
terraform-gpu-devservers/docker-build.tf
|
|
43
43
|
terraform-gpu-devservers/ecr.tf
|
|
44
44
|
terraform-gpu-devservers/efs.tf
|
|
@@ -48,6 +48,7 @@ terraform-gpu-devservers/git-cache.tf
|
|
|
48
48
|
terraform-gpu-devservers/gpu-dev-pod-irsa.tf
|
|
49
49
|
terraform-gpu-devservers/kubernetes.tf
|
|
50
50
|
terraform-gpu-devservers/lambda.tf
|
|
51
|
+
terraform-gpu-devservers/list_b200.py
|
|
51
52
|
terraform-gpu-devservers/main.tf
|
|
52
53
|
terraform-gpu-devservers/mig-config.tf
|
|
53
54
|
terraform-gpu-devservers/mig-parted-config.yaml
|
|
@@ -60,6 +61,7 @@ terraform-gpu-devservers/route53.tf
|
|
|
60
61
|
terraform-gpu-devservers/s3-disk-contents.tf
|
|
61
62
|
terraform-gpu-devservers/ssh-proxy-service.tf
|
|
62
63
|
terraform-gpu-devservers/ssh-proxy.tf
|
|
64
|
+
terraform-gpu-devservers/subnet-0fe3a2c45570091ad
|
|
63
65
|
terraform-gpu-devservers/switch-to.sh
|
|
64
66
|
terraform-gpu-devservers/variables.tf
|
|
65
67
|
terraform-gpu-devservers/.claude/skills/deploy.md
|
|
@@ -114,6 +116,7 @@ terraform-gpu-devservers/ssh-proxy/proxy.py
|
|
|
114
116
|
terraform-gpu-devservers/ssh-proxy/requirements.txt
|
|
115
117
|
terraform-gpu-devservers/templates/al2023-cpu-user-data.sh
|
|
116
118
|
terraform-gpu-devservers/templates/al2023-user-data.sh
|
|
119
|
+
terraform-gpu-devservers/templates/ami-baker-user-data.sh
|
|
117
120
|
terraform-gpu-devservers/templates/user-data-self-managed.sh
|
|
118
121
|
terraform-gpu-devservers/templates/user-data.sh
|
|
119
122
|
tests/submit/README.md
|