gpu-dev 0.4.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/PKG-INFO +1 -1
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/PKG-INFO +1 -1
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/gpu_dev_cli/reservations.py +9 -1
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/pyproject.toml +1 -1
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/reservation_processor/index.py +6 -4
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda.tf +1 -1
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/.github/workflows/no-gitlinks.yml +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/.github/workflows/publish.yml +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/.gitignore +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/CLAUDE.md +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/PROGRESS.md +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/PR_DESCRIPTION.md +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/TODO.md +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/admin/README.md +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/admin/generate_stats.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/admin/requirements.txt +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/README.md +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/ZERO_CONFIG_SETUP.md +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/SOURCES.txt +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/dependency_links.txt +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/entry_points.txt +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/requires.txt +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/top_level.txt +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/gpu_dev_cli/__init__.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/gpu_dev_cli/auth.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/gpu_dev_cli/cli.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/gpu_dev_cli/config.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/gpu_dev_cli/disks.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/gpu_dev_cli/interactive.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/gpu_dev_cli/name_generator.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/gpu_dev_cli/ssh_proxy.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/gpu-dev-cli/minimal-iam-policy.json +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/cli-tools/scripts/clear_stale_disk_locks.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/docs/USER_GUIDE.md +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/docs/devgpu-features.html +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/docs/docker-mark-blue.svg +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/docs/icons8-cursor-ai.svg +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/post.md +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/setup.cfg +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/.claude/skills/deploy.md +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/.terraform.lock.hcl +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/README.md +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/alb.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/availability.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/backend.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/.dockerignore +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/Dockerfile +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/backup-dotfiles +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/bash_profile +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/bashrc +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/bashrc_ext +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/build-with-efa.sh +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/dotfiles-shutdown-handler +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/list-dotfile-versions +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/motd_script +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/nproc_wrapper +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/profile +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/restore-dotfiles +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/restore-dotfiles-version +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/setup-dotfiles-persistence +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/shell_env +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/ssh_config +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/zprofile +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/zshrc +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker/zshrc_ext +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker-build.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker-example/Dockerfile +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/docker-example/hello.txt +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/ecr.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/efs.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/eks.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/expiry.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/git-cache.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/kubernetes.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/availability_updater/index.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/availability_updater/requirements.txt +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/migration/tag_largest_snapshots.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/reservation_expiry/index.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/reservation_expiry/requirements.txt +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/reservation_processor/buildkit_job.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/reservation_processor/requirements.txt +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/shared/__init__.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/shared/alb_utils.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/shared/dns_utils.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/shared/k8s_client.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/shared/k8s_resource_tracker.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/shared/requirements.txt +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/shared/snapshot_utils.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/main.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py.bak +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/migrations/check_snapshots.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/migrations/migrate_disks_to_named.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/migrations/run_backfill.sh +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/monitoring.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/outputs.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/pyproject.toml +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/queue.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/route53.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/s3-disk-contents.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/scripts/CLEANUP_GUIDE.md +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/scripts/detect_empty_volumes.sh +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/scripts/ec2_avail_probe.sh +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/scripts/inspect_user_data.sh +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/ssh-proxy/Dockerfile +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/ssh-proxy/proxy.py +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/ssh-proxy/requirements.txt +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/ssh-proxy-service.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/ssh-proxy.tf +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/switch-to.sh +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/templates/al2023-cpu-user-data.sh +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/templates/al2023-user-data.sh +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/templates/user-data-self-managed.sh +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/templates/user-data.sh +0 -0
- {gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/variables.tf +0 -0
|
@@ -1364,10 +1364,18 @@ class ReservationManager:
|
|
|
1364
1364
|
time.sleep(2)
|
|
1365
1365
|
continue
|
|
1366
1366
|
|
|
1367
|
+
# Check for error written by Lambda
|
|
1368
|
+
add_user_error = reservation.get("add_user_error", "")
|
|
1369
|
+
if add_user_error:
|
|
1370
|
+
live.stop()
|
|
1371
|
+
console.print(
|
|
1372
|
+
f"[red]❌ {add_user_error}[/red]"
|
|
1373
|
+
)
|
|
1374
|
+
return False
|
|
1375
|
+
|
|
1367
1376
|
current_secondary_users = reservation.get(
|
|
1368
1377
|
"secondary_users", [])
|
|
1369
1378
|
|
|
1370
|
-
# Check if the user has been added
|
|
1371
1379
|
if github_username in current_secondary_users:
|
|
1372
1380
|
live.stop()
|
|
1373
1381
|
console.print(
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "gpu-dev"
|
|
7
|
-
version = "0.4.
|
|
7
|
+
version = "0.4.1"
|
|
8
8
|
description = "CLI tool for PyTorch GPU developer server reservations"
|
|
9
9
|
authors = [{name = "PyTorch Team"}]
|
|
10
10
|
readme = "cli-tools/gpu-dev-cli/README.md"
|
{gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/reservation_processor/index.py
RENAMED
|
@@ -8153,12 +8153,14 @@ def process_add_user_action(record: dict[str, Any]) -> bool:
|
|
|
8153
8153
|
logger.info(
|
|
8154
8154
|
f"Successfully added user {github_username} to reservation {full_reservation_id}"
|
|
8155
8155
|
)
|
|
8156
|
+
# Clear any previous error
|
|
8157
|
+
update_reservation_fields(full_reservation_id, add_user_error="")
|
|
8156
8158
|
return True
|
|
8157
8159
|
else:
|
|
8158
|
-
|
|
8159
|
-
|
|
8160
|
-
)
|
|
8161
|
-
return
|
|
8160
|
+
error_msg = f"Failed to add user {github_username} - check that the GitHub user has public SSH keys at github.com/{github_username}.keys"
|
|
8161
|
+
logger.error(error_msg)
|
|
8162
|
+
update_reservation_fields(full_reservation_id, add_user_error=error_msg)
|
|
8163
|
+
return True # Don't retry - write error for CLI to detect
|
|
8162
8164
|
|
|
8163
8165
|
except Exception as e:
|
|
8164
8166
|
logger.error(f"Error processing add user action: {str(e)}")
|
|
@@ -180,7 +180,7 @@ resource "aws_lambda_function" "reservation_processor" {
|
|
|
180
180
|
HOSTED_ZONE_ID = local.effective_domain_name != "" ? local.hosted_zone_id : ""
|
|
181
181
|
SSH_DOMAIN_MAPPINGS_TABLE = local.effective_domain_name != "" ? aws_dynamodb_table.ssh_domain_mappings.name : ""
|
|
182
182
|
SSL_CERTIFICATE_ARN = local.effective_domain_name != "" ? aws_acm_certificate.wildcard[0].arn : ""
|
|
183
|
-
LAMBDA_VERSION = "0.4.
|
|
183
|
+
LAMBDA_VERSION = "0.4.1"
|
|
184
184
|
MIN_CLI_VERSION = "0.4.0"
|
|
185
185
|
DISK_CONTENTS_BUCKET = aws_s3_bucket.disk_contents.bucket
|
|
186
186
|
OPERATIONS_TABLE = aws_dynamodb_table.operations.name
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/availability_updater/index.py
RENAMED
|
File without changes
|
|
File without changes
|
{gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/migration/tag_largest_snapshots.py
RENAMED
|
File without changes
|
|
File without changes
|
{gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/reservation_expiry/requirements.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/lambda/shared/k8s_resource_tracker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/migrations/migrate_disks_to_named.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.4.0 → gpu_dev-0.4.1}/terraform-gpu-devservers/templates/user-data-self-managed.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|