gpu-dev 0.5.5__tar.gz → 0.5.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/PKG-INFO +1 -1
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/PKG-INFO +1 -1
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/pyproject.toml +1 -1
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/availability_updater/index.py +8 -3
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda.tf +1 -1
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/.github/workflows/no-gitlinks.yml +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/.github/workflows/publish.yml +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/.gitignore +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/CLAUDE.md +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/PROGRESS.md +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/PR_DESCRIPTION.md +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/TODO.md +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/admin/README.md +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/admin/generate_stats.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/admin/requirements.txt +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/README.md +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/ZERO_CONFIG_SETUP.md +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/SOURCES.txt +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/dependency_links.txt +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/entry_points.txt +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/requires.txt +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/top_level.txt +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/__init__.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/auth.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/cli.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/config.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/disks.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/interactive.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/name_generator.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/reservations.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/gpu_dev_cli/ssh_proxy.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/gpu-dev-cli/minimal-iam-policy.json +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/cli-tools/scripts/clear_stale_disk_locks.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/docs/USER_GUIDE.md +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/docs/devgpu-features.html +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/docs/docker-mark-blue.svg +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/docs/icons8-cursor-ai.svg +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/post.md +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/setup.cfg +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/.claude/skills/deploy.md +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/.terraform.lock.hcl +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/README.md +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/alb.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/availability.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/backend.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/.dockerignore +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/Dockerfile +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/backup-dotfiles +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/bash_profile +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/bashrc +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/bashrc_ext +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/build-with-efa.sh +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/dotfiles-shutdown-handler +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/list-dotfile-versions +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/motd_script +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/nproc_wrapper +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/profile +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/restore-dotfiles +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/restore-dotfiles-version +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/setup-dotfiles-persistence +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/shell_env +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/ssh_config +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/zprofile +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/zshrc +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker/zshrc_ext +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker-build.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker-example/Dockerfile +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/docker-example/hello.txt +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/ecr.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/efs.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/eks.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/expiry.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/git-cache.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/kubernetes.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/availability_updater/requirements.txt +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/migration/tag_largest_snapshots.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/reservation_expiry/index.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/reservation_expiry/requirements.txt +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/reservation_processor/buildkit_job.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/reservation_processor/index.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/reservation_processor/requirements.txt +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/shared/__init__.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/shared/alb_utils.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/shared/dns_utils.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/shared/k8s_client.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/shared/k8s_resource_tracker.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/shared/requirements.txt +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/shared/snapshot_utils.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/main.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py.bak +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/migrations/check_snapshots.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/migrations/migrate_disks_to_named.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/migrations/run_backfill.sh +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/monitoring.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/outputs.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/pyproject.toml +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/queue.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/route53.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/s3-disk-contents.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/scripts/CLEANUP_GUIDE.md +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/scripts/detect_empty_volumes.sh +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/scripts/ec2_avail_probe.sh +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/scripts/inspect_user_data.sh +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/ssh-proxy/Dockerfile +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/ssh-proxy/proxy.py +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/ssh-proxy/requirements.txt +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/ssh-proxy-service.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/ssh-proxy.tf +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/switch-to.sh +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/templates/al2023-cpu-user-data.sh +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/templates/al2023-user-data.sh +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/templates/user-data-self-managed.sh +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/templates/user-data.sh +0 -0
- {gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/variables.tf +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "gpu-dev"
|
|
7
|
-
version = "0.5.
|
|
7
|
+
version = "0.5.6"
|
|
8
8
|
description = "CLI tool for PyTorch GPU developer server reservations"
|
|
9
9
|
authors = [{name = "PyTorch Team"}]
|
|
10
10
|
readme = "cli-tools/gpu-dev-cli/README.md"
|
{gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/availability_updater/index.py
RENAMED
|
@@ -515,8 +515,14 @@ def compute_size_etas(v1, gpu_type, node_label_value, resource_name, gpus_per_in
|
|
|
515
515
|
pass
|
|
516
516
|
if gpus > 0:
|
|
517
517
|
pod_to_info[pod.metadata.name] = (pod.spec.node_name, gpus)
|
|
518
|
-
|
|
519
|
-
|
|
518
|
+
# used_now is the k8s ground-truth — count every running/pending pod, not just those
|
|
519
|
+
# we can match to a reservation row. Otherwise pods without DDB rows look like free GPUs.
|
|
520
|
+
node_state[pod.spec.node_name]["used_now"] += gpus
|
|
521
|
+
|
|
522
|
+
# 3) Cross-reference active reservations to attach expiry timestamps to each known pod.
|
|
523
|
+
# Pods without a matching reservation row keep their GPUs marked as used_now but have no
|
|
524
|
+
# expiration → they're treated as "never expiring" by the simulation, which is the safe
|
|
525
|
+
# fallback (we don't fabricate ETAs for usage we can't trace).
|
|
520
526
|
target_gpu_type_lower = gpu_type.lower()
|
|
521
527
|
for r in active_reservations:
|
|
522
528
|
# Reservations table stores gpu_type uppercased ("H100"); compare case-insensitively.
|
|
@@ -534,7 +540,6 @@ def compute_size_etas(v1, gpu_type, node_label_value, resource_name, gpus_per_in
|
|
|
534
540
|
except (ValueError, TypeError):
|
|
535
541
|
continue
|
|
536
542
|
node_name, gpus = pod_to_info[pod_name]
|
|
537
|
-
node_state[node_name]["used_now"] += gpus
|
|
538
543
|
node_state[node_name]["expirations"].append((ts, gpus))
|
|
539
544
|
|
|
540
545
|
# Sort each node's expirations by time.
|
|
@@ -180,7 +180,7 @@ resource "aws_lambda_function" "reservation_processor" {
|
|
|
180
180
|
HOSTED_ZONE_ID = local.effective_domain_name != "" ? local.hosted_zone_id : ""
|
|
181
181
|
SSH_DOMAIN_MAPPINGS_TABLE = local.effective_domain_name != "" ? aws_dynamodb_table.ssh_domain_mappings.name : ""
|
|
182
182
|
SSL_CERTIFICATE_ARN = local.effective_domain_name != "" ? aws_acm_certificate.wildcard[0].arn : ""
|
|
183
|
-
LAMBDA_VERSION = "0.5.
|
|
183
|
+
LAMBDA_VERSION = "0.5.6"
|
|
184
184
|
MIN_CLI_VERSION = "0.5.5"
|
|
185
185
|
DISK_CONTENTS_BUCKET = aws_s3_bucket.disk_contents.bucket
|
|
186
186
|
OPERATIONS_TABLE = aws_dynamodb_table.operations.name
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/migration/tag_largest_snapshots.py
RENAMED
|
File without changes
|
|
File without changes
|
{gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/reservation_expiry/requirements.txt
RENAMED
|
File without changes
|
|
File without changes
|
{gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/reservation_processor/index.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/lambda/shared/k8s_resource_tracker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/migrations/migrate_disks_to_named.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpu_dev-0.5.5 → gpu_dev-0.5.6}/terraform-gpu-devservers/templates/user-data-self-managed.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|