gpu-dev 0.6.2__tar.gz → 0.6.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/PKG-INFO +1 -1
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/PKG-INFO +1 -1
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/SOURCES.txt +4 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/cli.py +32 -8
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/pyproject.toml +1 -1
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/README.md +54 -1
- gpu_dev-0.6.3/sdk/python/examples/batch_multi_gpu.py +66 -0
- gpu_dev-0.6.3/sdk/python/examples/interactive_debug.py +54 -0
- gpu_dev-0.6.3/sdk/python/examples/run_tests.py +64 -0
- gpu_dev-0.6.3/sdk/python/examples/submit_job.py +38 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/src/gpu_dev/_backend/aws.py +11 -4
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/src/gpu_dev/_sync/client.py +60 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/src/gpu_dev/_sync/sandbox.py +112 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/lambda/reservation_processor/index.py +46 -41
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/.github/workflows/no-gitlinks.yml +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/.github/workflows/publish.yml +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/.gitignore +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/CLAUDE.md +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/README.md +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/admin/README.md +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/admin/generate_stats.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/admin/requirements.txt +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/README.md +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/ZERO_CONFIG_SETUP.md +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/dependency_links.txt +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/entry_points.txt +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/requires.txt +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/top_level.txt +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/__init__.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/auth.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/config.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/disks.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/interactive.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/name_generator.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/reservations.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/ssh_proxy.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/minimal-iam-policy.json +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/cli-tools/scripts/clear_stale_disk_locks.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/docs/USER_GUIDE.md +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/docs/devgpu-features.html +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/docs/docker-mark-blue.svg +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/docs/icons8-cursor-ai.svg +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/examples/quickstart.ipynb +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/pyproject.toml +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/src/gpu_dev/__init__.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/src/gpu_dev/_async/__init__.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/src/gpu_dev/_backend/__init__.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/src/gpu_dev/_backend/protocol.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/src/gpu_dev/_sync/__init__.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/src/gpu_dev/_transport/__init__.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/src/gpu_dev/_transport/ssh.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/src/gpu_dev/common/__init__.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/src/gpu_dev/common/config.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/src/gpu_dev/common/enums.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/src/gpu_dev/common/errors.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/src/gpu_dev/common/models.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/src/gpu_dev/py.typed +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/tests/__init__.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/sdk/python/tests/test_models.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/setup.cfg +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/.claude/skills/deploy.md +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/.terraform.lock.hcl +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/README.md +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/alb.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/ami-baker.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/availability.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/backend.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/check_b200.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/cluster-autoscaler.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/cmd_proxy.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/.dockerignore +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/Dockerfile +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/backup-dotfiles +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/bash_profile +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/bashrc +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/bashrc_ext +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/build-with-efa.sh +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/dotfiles-shutdown-handler +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/list-dotfile-versions +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/motd_script +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/nproc_wrapper +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/profile +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/restore-dotfiles +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/restore-dotfiles-version +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/setup-dotfiles-persistence +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/shell_env +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/ssh_config +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/zprofile +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/zshrc +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker/zshrc_ext +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker-build.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker-example/Dockerfile +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/docker-example/hello.txt +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/ecr.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/efs.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/eks.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/expiry.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/git-cache.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/gpu-dev-pod-irsa.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/kubernetes.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/lambda/availability_updater/index.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/lambda/availability_updater/requirements.txt +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/lambda/migration/tag_largest_snapshots.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/lambda/reservation_expiry/index.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/lambda/reservation_expiry/requirements.txt +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/lambda/reservation_processor/buildkit_job.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/lambda/reservation_processor/requirements.txt +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/lambda/shared/__init__.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/lambda/shared/alb_utils.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/lambda/shared/dns_utils.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/lambda/shared/k8s_client.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/lambda/shared/k8s_resource_tracker.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/lambda/shared/requirements.txt +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/lambda/shared/snapshot_utils.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/lambda.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/list_b200.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/main.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/mig-config.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/mig-parted-config.yaml +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/migrations/backfill_snapshot_contents.py.bak +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/migrations/check_snapshots.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/migrations/migrate_disks_to_named.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/migrations/run_backfill.sh +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/monitoring.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/node-termination-handler.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/outputs.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/pyproject.toml +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/queue.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/route53.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/s3-disk-contents.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/scripts/CLEANUP_GUIDE.md +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/scripts/detect_empty_volumes.sh +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/scripts/ec2_avail_probe.sh +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/scripts/inspect_user_data.sh +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/ssh-proxy/Dockerfile +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/ssh-proxy/proxy.py +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/ssh-proxy/requirements.txt +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/ssh-proxy-service.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/ssh-proxy.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/subnet-0fe3a2c45570091ad +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/switch-to.sh +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/templates/al2023-cpu-user-data.sh +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/templates/al2023-user-data.sh +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/templates/ami-baker-user-data.sh +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/templates/user-data-self-managed.sh +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/templates/user-data.sh +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/terraform-gpu-devservers/variables.tf +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/tests/submit/README.md +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/tests/submit/fail/run.sh +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/tests/submit/multinode/run.sh +0 -0
- {gpu_dev-0.6.2 → gpu_dev-0.6.3}/tests/submit/success/run.sh +0 -0
|
@@ -32,7 +32,11 @@ docs/docker-mark-blue.svg
|
|
|
32
32
|
docs/icons8-cursor-ai.svg
|
|
33
33
|
sdk/python/README.md
|
|
34
34
|
sdk/python/pyproject.toml
|
|
35
|
+
sdk/python/examples/batch_multi_gpu.py
|
|
36
|
+
sdk/python/examples/interactive_debug.py
|
|
35
37
|
sdk/python/examples/quickstart.ipynb
|
|
38
|
+
sdk/python/examples/run_tests.py
|
|
39
|
+
sdk/python/examples/submit_job.py
|
|
36
40
|
sdk/python/src/gpu_dev/__init__.py
|
|
37
41
|
sdk/python/src/gpu_dev/py.typed
|
|
38
42
|
sdk/python/src/gpu_dev/_async/__init__.py
|
|
@@ -1192,8 +1192,10 @@ def reserve(
|
|
|
1192
1192
|
# Build choices
|
|
1193
1193
|
choices = []
|
|
1194
1194
|
|
|
1195
|
-
#
|
|
1196
|
-
|
|
1195
|
+
# Show all non-deleted disks, marking in-use ones as disabled
|
|
1196
|
+
all_disks = [d for d in existing_disks if not d.get('is_deleted', False)]
|
|
1197
|
+
available_disks = [d for d in all_disks if not d['in_use']]
|
|
1198
|
+
in_use_disks = [d for d in all_disks if d['in_use']]
|
|
1197
1199
|
|
|
1198
1200
|
if available_disks:
|
|
1199
1201
|
choices.append(questionary.Separator("=== Available Disks ==="))
|
|
@@ -1204,6 +1206,17 @@ def reserve(
|
|
|
1204
1206
|
value=("select", d['name'])
|
|
1205
1207
|
))
|
|
1206
1208
|
|
|
1209
|
+
if in_use_disks:
|
|
1210
|
+
choices.append(questionary.Separator("=== In Use ==="))
|
|
1211
|
+
for d in in_use_disks:
|
|
1212
|
+
res_id = d.get('reservation_id', '?')[:8]
|
|
1213
|
+
display = f"{d['name']} ({d['size_gb']}GB) — in use by {res_id}"
|
|
1214
|
+
choices.append(questionary.Choice(
|
|
1215
|
+
title=display,
|
|
1216
|
+
value=("in_use", d['name']),
|
|
1217
|
+
disabled="currently in use",
|
|
1218
|
+
))
|
|
1219
|
+
|
|
1207
1220
|
choices.append(questionary.Separator("=== Options ==="))
|
|
1208
1221
|
choices.append(questionary.Choice(
|
|
1209
1222
|
title="Create a new disk",
|
|
@@ -3307,12 +3320,21 @@ def connect(ctx: click.Context, reservation_id: Optional[str]) -> None:
|
|
|
3307
3320
|
# Fast path: if reservation ID given, check local SSH config first (no network)
|
|
3308
3321
|
if reservation_id:
|
|
3309
3322
|
ssh_config_dir = Path.home() / ".gpu-dev"
|
|
3310
|
-
|
|
3311
|
-
if
|
|
3312
|
-
|
|
3313
|
-
|
|
3314
|
-
|
|
3315
|
-
|
|
3323
|
+
config_file = ssh_config_dir / f"{reservation_id[:8]}-sshconfig"
|
|
3324
|
+
if config_file.exists():
|
|
3325
|
+
config_text = config_file.read_text()
|
|
3326
|
+
fqdn_line = [l.strip() for l in config_text.splitlines() if l.strip().startswith("HostName")]
|
|
3327
|
+
if fqdn_line:
|
|
3328
|
+
fqdn = fqdn_line[0].split(None, 1)[1]
|
|
3329
|
+
pod_name = f"gpu-dev-{reservation_id[:8]}"
|
|
3330
|
+
rprint(f"[cyan]Connecting to {pod_name}...[/cyan]\n")
|
|
3331
|
+
import subprocess, sys
|
|
3332
|
+
sys.exit(subprocess.call([
|
|
3333
|
+
"ssh", "-o", "StrictHostKeyChecking=no", "-o", "UserKnownHostsFile=/dev/null",
|
|
3334
|
+
"-o", "ProxyCommand=gpu-dev-ssh-proxy %h %p",
|
|
3335
|
+
"-o", "ForwardAgent=yes",
|
|
3336
|
+
f"dev@{fqdn}",
|
|
3337
|
+
]))
|
|
3316
3338
|
|
|
3317
3339
|
with Live(
|
|
3318
3340
|
Spinner("dots", text="📡 Fetching reservation details..."), console=console
|
|
@@ -3543,7 +3565,9 @@ def connect(ctx: click.Context, reservation_id: Optional[str]) -> None:
|
|
|
3543
3565
|
except KeyboardInterrupt:
|
|
3544
3566
|
rprint("\n[yellow]Connection cancelled by user[/yellow]")
|
|
3545
3567
|
except Exception as e:
|
|
3568
|
+
import traceback
|
|
3546
3569
|
rprint(f"[red]❌ Error: {str(e)}[/red]")
|
|
3570
|
+
traceback.print_exc()
|
|
3547
3571
|
|
|
3548
3572
|
|
|
3549
3573
|
@main.command(name="get-ssh-config")
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "gpu-dev"
|
|
7
|
-
version = "0.6.
|
|
7
|
+
version = "0.6.3"
|
|
8
8
|
description = "CLI tool for PyTorch GPU developer server reservations"
|
|
9
9
|
authors = [{name = "PyTorch Team"}]
|
|
10
10
|
readme = "cli-tools/gpu-dev-cli/README.md"
|
|
@@ -44,6 +44,23 @@ with client.reserve(gpu_type="t4") as sb:
|
|
|
44
44
|
# reservation cancelled automatically
|
|
45
45
|
```
|
|
46
46
|
|
|
47
|
+
## Progress Tracking
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
# Built-in progress logging
|
|
51
|
+
sandbox = client.reserve(gpu_type="h100", on_progress=True)
|
|
52
|
+
# [ 1.5s] pending
|
|
53
|
+
# [ 3.2s] preparing
|
|
54
|
+
# [ 8.1s] 🚀 Container running
|
|
55
|
+
# [ 22.4s] Ready
|
|
56
|
+
|
|
57
|
+
# Custom callback
|
|
58
|
+
sandbox = client.reserve(
|
|
59
|
+
gpu_type="h100",
|
|
60
|
+
on_progress=lambda msg, t: print(f"⏳ [{t:.0f}s] {msg}")
|
|
61
|
+
)
|
|
62
|
+
```
|
|
63
|
+
|
|
47
64
|
## Available GPU Types
|
|
48
65
|
|
|
49
66
|
| Type | GPUs/node | Architecture |
|
|
@@ -75,6 +92,7 @@ client = GpuDev(GpuDevConfig(github_user="octocat")) # Explicit config
|
|
|
75
92
|
| `list(status=[...])` | List reservations as `Sandbox` objects |
|
|
76
93
|
| `availability()` | GPU availability by type |
|
|
77
94
|
| `disks()` | List persistent disks |
|
|
95
|
+
| `search_logs(reservation_id)` | Get processing logs for any reservation |
|
|
78
96
|
|
|
79
97
|
### `Sandbox` — Reserved Environment
|
|
80
98
|
|
|
@@ -82,6 +100,8 @@ client = GpuDev(GpuDevConfig(github_user="octocat")) # Explicit config
|
|
|
82
100
|
sandbox = client.reserve(gpu_type="h100")
|
|
83
101
|
```
|
|
84
102
|
|
|
103
|
+
**Methods:**
|
|
104
|
+
|
|
85
105
|
| Method | Description |
|
|
86
106
|
|--------|-------------|
|
|
87
107
|
| `exec(command, timeout=None)` | Run shell command, returns `ExecResult` |
|
|
@@ -91,7 +111,11 @@ sandbox = client.reserve(gpu_type="h100")
|
|
|
91
111
|
| `extend(hours)` | Extend duration |
|
|
92
112
|
| `refresh()` | Refresh status from server |
|
|
93
113
|
| `add_user(github_username)` | Grant SSH access to another user |
|
|
94
|
-
| `wait_until_ready(
|
|
114
|
+
| `wait_until_ready(timeout, on_progress)` | Block until active |
|
|
115
|
+
| `logs()` | Get reservation processing log |
|
|
116
|
+
| `pod_logs(lines=50)` | Get container stdout via SSH |
|
|
117
|
+
|
|
118
|
+
**Properties:**
|
|
95
119
|
|
|
96
120
|
| Property | Description |
|
|
97
121
|
|----------|-------------|
|
|
@@ -101,8 +125,15 @@ sandbox = client.reserve(gpu_type="h100")
|
|
|
101
125
|
| `gpu_count` | Number of GPUs |
|
|
102
126
|
| `ssh_command` | SSH command string |
|
|
103
127
|
| `pod_name` | SSH hostname |
|
|
128
|
+
| `fqdn` | Fully-qualified domain name |
|
|
104
129
|
| `is_active` | Whether ready for commands |
|
|
105
130
|
| `expires_at` | Expiration time |
|
|
131
|
+
| `disk_name` | Attached persistent disk |
|
|
132
|
+
| `instance_type` | EC2 instance type |
|
|
133
|
+
| `created_at` | Creation timestamp |
|
|
134
|
+
| `node_ip` | Node public IP |
|
|
135
|
+
| `detailed_status` | Detailed status message |
|
|
136
|
+
| `user_id` | Owner's user ID |
|
|
106
137
|
|
|
107
138
|
### `ExecResult`
|
|
108
139
|
|
|
@@ -113,6 +144,21 @@ result.stdout # "hello\n"
|
|
|
113
144
|
result.stderr # ""
|
|
114
145
|
```
|
|
115
146
|
|
|
147
|
+
## Logs & Debugging
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
# Reservation processing log (what happened during setup)
|
|
151
|
+
for entry in sandbox.logs():
|
|
152
|
+
print(f"[{entry['timestamp'][11:23]}] {entry['message']}")
|
|
153
|
+
|
|
154
|
+
# Look up logs for any reservation by ID prefix
|
|
155
|
+
for entry in client.search_logs("abc12345"):
|
|
156
|
+
print(f"[{entry['timestamp'][11:23]}] {entry['message']}")
|
|
157
|
+
|
|
158
|
+
# Container stdout (via SSH)
|
|
159
|
+
print(sandbox.pod_logs(lines=20))
|
|
160
|
+
```
|
|
161
|
+
|
|
116
162
|
## Spot Instances
|
|
117
163
|
|
|
118
164
|
Use spot instances for lower cost (may be preempted):
|
|
@@ -129,6 +175,7 @@ Data persists across reservations when using named disks:
|
|
|
129
175
|
# First session
|
|
130
176
|
sb = client.reserve(gpu_type="h100", disk_name="my-project")
|
|
131
177
|
sb.exec("pip install torch && echo done")
|
|
178
|
+
sb.cancel()
|
|
132
179
|
|
|
133
180
|
# Later session — packages still installed
|
|
134
181
|
sb = client.reserve(gpu_type="h100", disk_name="my-project")
|
|
@@ -185,3 +232,9 @@ except GpuDevValidationError as e:
|
|
|
185
232
|
except GpuDevTimeoutError:
|
|
186
233
|
print("Reservation timed out — GPUs may be busy")
|
|
187
234
|
```
|
|
235
|
+
|
|
236
|
+
Credentials are cached to disk (45-min TTL) and auto-refreshed on expiry — no manual re-auth needed in long-running notebooks.
|
|
237
|
+
|
|
238
|
+
## Interactive Notebook
|
|
239
|
+
|
|
240
|
+
See [examples/quickstart.ipynb](examples/quickstart.ipynb) for a hands-on walkthrough.
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Run the same job across multiple GPU types and compare results.
|
|
2
|
+
|
|
3
|
+
Useful for benchmarking or testing compatibility across hardware.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
python batch_multi_gpu.py
|
|
7
|
+
"""
|
|
8
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
9
|
+
|
|
10
|
+
from gpu_dev import GpuDev, GpuDevError
|
|
11
|
+
|
|
12
|
+
client = GpuDev()
|
|
13
|
+
|
|
14
|
+
BENCHMARK_CMD = """
|
|
15
|
+
python3 -c '
|
|
16
|
+
import torch, time
|
|
17
|
+
gpu = torch.cuda.get_device_name(0)
|
|
18
|
+
x = torch.randn(4096, 4096, device="cuda")
|
|
19
|
+
torch.cuda.synchronize()
|
|
20
|
+
t0 = time.time()
|
|
21
|
+
for _ in range(100):
|
|
22
|
+
y = x @ x
|
|
23
|
+
torch.cuda.synchronize()
|
|
24
|
+
ms = (time.time() - t0) * 1000
|
|
25
|
+
print(f"{gpu}|{ms:.0f}")
|
|
26
|
+
'
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
GPU_TYPES = ["t4", "l4", "rtxpro6000"]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def run_benchmark(gpu_type: str) -> dict:
|
|
33
|
+
try:
|
|
34
|
+
sb = client.reserve(
|
|
35
|
+
gpu_type=gpu_type,
|
|
36
|
+
gpu_count=1,
|
|
37
|
+
hours=0.25,
|
|
38
|
+
name=f"bench-{gpu_type}",
|
|
39
|
+
)
|
|
40
|
+
result = sb.exec(BENCHMARK_CMD.strip(), timeout=30)
|
|
41
|
+
sb.cancel()
|
|
42
|
+
|
|
43
|
+
if result.exit_code == 0 and "|" in result.stdout:
|
|
44
|
+
gpu_name, ms = result.stdout.strip().split("|")
|
|
45
|
+
return {"gpu_type": gpu_type, "gpu_name": gpu_name, "ms": float(ms), "ok": True}
|
|
46
|
+
return {"gpu_type": gpu_type, "error": result.stderr or result.stdout, "ok": False}
|
|
47
|
+
except GpuDevError as e:
|
|
48
|
+
return {"gpu_type": gpu_type, "error": str(e), "ok": False}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
print(f"Benchmarking matmul 4096x4096 x100 across {len(GPU_TYPES)} GPU types...\n")
|
|
52
|
+
|
|
53
|
+
# Run in parallel
|
|
54
|
+
with ThreadPoolExecutor(max_workers=len(GPU_TYPES)) as ex:
|
|
55
|
+
futures = {ex.submit(run_benchmark, gt): gt for gt in GPU_TYPES}
|
|
56
|
+
|
|
57
|
+
print(f"{'GPU Type':15s} {'GPU Name':30s} {'Time':>8s}")
|
|
58
|
+
print("-" * 55)
|
|
59
|
+
for future in as_completed(futures):
|
|
60
|
+
r = future.result()
|
|
61
|
+
if r["ok"]:
|
|
62
|
+
print(f"{r['gpu_type']:15s} {r['gpu_name']:30s} {r['ms']:>7.0f}ms")
|
|
63
|
+
else:
|
|
64
|
+
print(f"{r['gpu_type']:15s} FAILED: {r['error'][:40]}")
|
|
65
|
+
|
|
66
|
+
print("\nDone")
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Interactive debugging: reserve a GPU, poke around, inspect logs.
|
|
2
|
+
|
|
3
|
+
Use this in a Python REPL or Jupyter notebook for ad-hoc debugging.
|
|
4
|
+
|
|
5
|
+
from gpu_dev import GpuDev
|
|
6
|
+
client = GpuDev()
|
|
7
|
+
exec(open("examples/interactive_debug.py").read())
|
|
8
|
+
"""
|
|
9
|
+
from gpu_dev import GpuDev
|
|
10
|
+
|
|
11
|
+
client = GpuDev()
|
|
12
|
+
|
|
13
|
+
# Show what's available
|
|
14
|
+
print("GPU Availability:")
|
|
15
|
+
for gpu, info in sorted(client.availability().items()):
|
|
16
|
+
if info.total > 0:
|
|
17
|
+
print(f" {gpu:15s} {info.available:>3d}/{info.total} free")
|
|
18
|
+
|
|
19
|
+
# Show active reservations
|
|
20
|
+
print("\nActive reservations:")
|
|
21
|
+
for sb in client.list():
|
|
22
|
+
print(f" {sb.id[:8]} {sb.gpu_count}x {sb.gpu_type:10s} {sb.status.value:10s} disk={sb.disk_name or '-'}")
|
|
23
|
+
|
|
24
|
+
# Show disks
|
|
25
|
+
print("\nDisks:")
|
|
26
|
+
for d in client.disks():
|
|
27
|
+
status = "IN USE" if d.in_use else "free"
|
|
28
|
+
print(f" {d.name:20s} {d.snapshot_count:>3d} snapshots {status}")
|
|
29
|
+
|
|
30
|
+
# Reconnect to most recent active reservation
|
|
31
|
+
active = client.list(status=["active"])
|
|
32
|
+
if active:
|
|
33
|
+
sb = active[0]
|
|
34
|
+
print(f"\nReconnected to {sb.id[:8]} ({sb.gpu_count}x {sb.gpu_type})")
|
|
35
|
+
print(f" SSH: ssh {sb.pod_name}")
|
|
36
|
+
print(f" Disk: {sb.disk_name}")
|
|
37
|
+
print(f" Expires: {sb.expires_at}")
|
|
38
|
+
|
|
39
|
+
# Quick health check
|
|
40
|
+
result = sb.exec("nvidia-smi -L 2>&1 | head -4", timeout=5)
|
|
41
|
+
if result.exit_code == 0:
|
|
42
|
+
print(f" GPU: {result.stdout.strip()}")
|
|
43
|
+
else:
|
|
44
|
+
print(f" GPU check failed (exit {result.exit_code})")
|
|
45
|
+
|
|
46
|
+
# Show setup logs
|
|
47
|
+
print(f"\n Setup log:")
|
|
48
|
+
for entry in sb.logs():
|
|
49
|
+
print(f" [{entry['timestamp'][11:19]}] {entry['message'][:70]}")
|
|
50
|
+
else:
|
|
51
|
+
print("\nNo active reservations")
|
|
52
|
+
|
|
53
|
+
# Look up a past reservation's logs
|
|
54
|
+
# client.search_logs("abc12345")
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Run tests on a GPU server with a persistent disk snapshot.
|
|
2
|
+
|
|
3
|
+
Loads a pre-configured environment from a named disk and runs
|
|
4
|
+
a test suite — useful for CI or interactive debugging.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
python run_tests.py
|
|
8
|
+
python run_tests.py --branch feature/my-fix
|
|
9
|
+
"""
|
|
10
|
+
import sys
|
|
11
|
+
|
|
12
|
+
from gpu_dev import GpuDev, GpuDevTimeoutError
|
|
13
|
+
|
|
14
|
+
branch = sys.argv[1] if len(sys.argv) > 1 else "main"
|
|
15
|
+
client = GpuDev()
|
|
16
|
+
|
|
17
|
+
print(f"Reserving H100 with 'pytorch-dev' disk (branch: {branch})...")
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
sb = client.reserve(
|
|
21
|
+
gpu_type="h100",
|
|
22
|
+
gpu_count=1,
|
|
23
|
+
hours=2,
|
|
24
|
+
disk_name="pytorch-dev", # pre-compiled PyTorch environment
|
|
25
|
+
name=f"test-{branch[:20]}",
|
|
26
|
+
on_progress=True,
|
|
27
|
+
)
|
|
28
|
+
except GpuDevTimeoutError:
|
|
29
|
+
print("No GPU capacity available — try again later or use spot")
|
|
30
|
+
sys.exit(1)
|
|
31
|
+
|
|
32
|
+
print(f"\nRunning on {sb.pod_name} ({sb.instance_type})")
|
|
33
|
+
|
|
34
|
+
# Pull latest code
|
|
35
|
+
result = sb.exec(f"""
|
|
36
|
+
cd /home/dev/pytorch && \
|
|
37
|
+
git fetch origin && \
|
|
38
|
+
git checkout {branch} && \
|
|
39
|
+
git pull origin {branch}
|
|
40
|
+
""", timeout=120)
|
|
41
|
+
print(result.stdout[-200:] if result.stdout else "(no output)")
|
|
42
|
+
|
|
43
|
+
if result.exit_code != 0:
|
|
44
|
+
print(f"Git checkout failed: {result.stderr}")
|
|
45
|
+
sb.cancel()
|
|
46
|
+
sys.exit(1)
|
|
47
|
+
|
|
48
|
+
# Run tests
|
|
49
|
+
print(f"\nRunning tests on {branch}...")
|
|
50
|
+
result = sb.exec(
|
|
51
|
+
"cd /home/dev/pytorch && python test/run_test.py test_torch 2>&1 | tail -30",
|
|
52
|
+
timeout=1800,
|
|
53
|
+
)
|
|
54
|
+
print(result.stdout)
|
|
55
|
+
|
|
56
|
+
# Show timing from reservation logs
|
|
57
|
+
print("\nReservation timeline:")
|
|
58
|
+
for entry in sb.logs():
|
|
59
|
+
print(f" [{entry['timestamp'][11:23]}] {entry['message'][:80]}")
|
|
60
|
+
|
|
61
|
+
exit_code = result.exit_code
|
|
62
|
+
sb.cancel()
|
|
63
|
+
print(f"\nTests {'PASSED' if exit_code == 0 else 'FAILED'} (exit {exit_code})")
|
|
64
|
+
sys.exit(exit_code)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Submit a training job to a GPU server and wait for results.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
python submit_job.py
|
|
5
|
+
"""
|
|
6
|
+
from gpu_dev import GpuDev
|
|
7
|
+
|
|
8
|
+
client = GpuDev()
|
|
9
|
+
|
|
10
|
+
# Reserve a T4 GPU, auto-cancel when done
|
|
11
|
+
with client.reserve(gpu_type="t4", hours=1, name="training-job", on_progress=True) as sb:
|
|
12
|
+
print(f"\nReserved: {sb.id[:8]} on {sb.instance_type}")
|
|
13
|
+
print(f"SSH: {sb.ssh_command}\n")
|
|
14
|
+
|
|
15
|
+
# Upload training script
|
|
16
|
+
sb.upload("./train.py", "/home/dev/train.py")
|
|
17
|
+
|
|
18
|
+
# Run training
|
|
19
|
+
print("Starting training...")
|
|
20
|
+
result = sb.exec("cd /home/dev && python train.py 2>&1", timeout=600)
|
|
21
|
+
print(result.stdout)
|
|
22
|
+
|
|
23
|
+
if result.exit_code != 0:
|
|
24
|
+
print(f"Training failed (exit {result.exit_code})")
|
|
25
|
+
print(result.stderr)
|
|
26
|
+
else:
|
|
27
|
+
# Download results
|
|
28
|
+
sb.download("/home/dev/output/", "./results/")
|
|
29
|
+
print("Results downloaded to ./results/")
|
|
30
|
+
|
|
31
|
+
# Check logs if something went wrong
|
|
32
|
+
if result.exit_code != 0:
|
|
33
|
+
print("\nReservation logs:")
|
|
34
|
+
for entry in sb.logs("error"):
|
|
35
|
+
print(f" [{entry['timestamp'][11:23]}] {entry['message']}")
|
|
36
|
+
|
|
37
|
+
# Reservation auto-cancelled
|
|
38
|
+
print("Done — reservation cleaned up")
|
|
@@ -30,16 +30,19 @@ _PREFIX = "pytorch-gpu-dev"
|
|
|
30
30
|
_CRED_CACHE_PATH = Path.home() / ".config" / "gpu-dev" / "aws-cred-cache.json"
|
|
31
31
|
_CRED_CACHE_TTL = 2700 # 45 min (SSO session tokens typically last 1h)
|
|
32
32
|
|
|
33
|
-
# Module-level session cache
|
|
33
|
+
# Module-level session cache with expiry tracking
|
|
34
34
|
_cached_session: boto3.Session | None = None
|
|
35
|
+
_cached_session_expires: float = 0
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
def _get_session() -> boto3.Session:
|
|
38
39
|
"""Get a boto3 session with disk-cached credentials (saves ~900ms SSO resolution)."""
|
|
39
|
-
global _cached_session
|
|
40
|
-
if _cached_session is not None:
|
|
40
|
+
global _cached_session, _cached_session_expires
|
|
41
|
+
if _cached_session is not None and time.time() < _cached_session_expires:
|
|
41
42
|
return _cached_session
|
|
42
43
|
|
|
44
|
+
_cached_session = None
|
|
45
|
+
|
|
43
46
|
# Try disk-cached credentials
|
|
44
47
|
try:
|
|
45
48
|
if _CRED_CACHE_PATH.exists():
|
|
@@ -50,6 +53,7 @@ def _get_session() -> boto3.Session:
|
|
|
50
53
|
aws_secret_access_key=cached["secret_key"],
|
|
51
54
|
aws_session_token=cached["token"],
|
|
52
55
|
)
|
|
56
|
+
_cached_session_expires = cached["expires"]
|
|
53
57
|
return _cached_session
|
|
54
58
|
except Exception:
|
|
55
59
|
pass
|
|
@@ -80,6 +84,7 @@ def _get_session() -> boto3.Session:
|
|
|
80
84
|
pass
|
|
81
85
|
|
|
82
86
|
_cached_session = session
|
|
87
|
+
_cached_session_expires = time.time() + _CRED_CACHE_TTL
|
|
83
88
|
return session
|
|
84
89
|
|
|
85
90
|
|
|
@@ -100,11 +105,13 @@ class AwsBackend:
|
|
|
100
105
|
self._reservations = self._ddb.Table(f"{_PREFIX}-reservations")
|
|
101
106
|
self._availability = self._ddb.Table(f"{_PREFIX}-gpu-availability")
|
|
102
107
|
self._disks = self._ddb.Table(f"{_PREFIX}-disks")
|
|
108
|
+
self._queue_url: str | None = None
|
|
103
109
|
|
|
104
110
|
def _refresh_on_expired(self) -> None:
|
|
105
111
|
"""Clear cached session and reinitialize clients."""
|
|
106
|
-
global _cached_session
|
|
112
|
+
global _cached_session, _cached_session_expires
|
|
107
113
|
_cached_session = None
|
|
114
|
+
_cached_session_expires = 0
|
|
108
115
|
try:
|
|
109
116
|
_CRED_CACHE_PATH.unlink(missing_ok=True)
|
|
110
117
|
except Exception:
|
|
@@ -243,3 +243,63 @@ class GpuDev:
|
|
|
243
243
|
"""
|
|
244
244
|
user_info = self._auth()
|
|
245
245
|
return self._backend.list_disks(user_info["user_id"])
|
|
246
|
+
|
|
247
|
+
def search_logs(
|
|
248
|
+
self,
|
|
249
|
+
reservation_id: str,
|
|
250
|
+
) -> list[dict[str, str]]:
|
|
251
|
+
"""Get status history for any reservation by ID.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
reservation_id: Full or prefix (8+ chars) reservation ID.
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
List of ``{"timestamp": "...", "message": "..."}`` dicts.
|
|
258
|
+
|
|
259
|
+
Example::
|
|
260
|
+
|
|
261
|
+
for entry in client.search_logs("abc12345"):
|
|
262
|
+
print(f"[{entry['timestamp']}] {entry['message']}")
|
|
263
|
+
"""
|
|
264
|
+
from .._backend.aws import _get_session, _PREFIX
|
|
265
|
+
|
|
266
|
+
session = _get_session()
|
|
267
|
+
region = getattr(self._backend, "_region", "us-east-2")
|
|
268
|
+
ddb = session.resource("dynamodb", region_name=region)
|
|
269
|
+
table = ddb.Table(f"{_PREFIX}-reservations")
|
|
270
|
+
|
|
271
|
+
# Try direct lookup first, then query UserIndex by prefix
|
|
272
|
+
try:
|
|
273
|
+
user_info = self._auth()
|
|
274
|
+
if len(reservation_id) >= 32:
|
|
275
|
+
resp = table.get_item(Key={"reservation_id": reservation_id})
|
|
276
|
+
item = resp.get("Item")
|
|
277
|
+
else:
|
|
278
|
+
query_kwargs = {
|
|
279
|
+
"IndexName": "UserIndex",
|
|
280
|
+
"KeyConditionExpression": "user_id = :uid",
|
|
281
|
+
"FilterExpression": "begins_with(reservation_id, :rid)",
|
|
282
|
+
"ExpressionAttributeValues": {
|
|
283
|
+
":uid": user_info["user_id"],
|
|
284
|
+
":rid": reservation_id,
|
|
285
|
+
},
|
|
286
|
+
}
|
|
287
|
+
item = None
|
|
288
|
+
resp = table.query(**query_kwargs)
|
|
289
|
+
if resp.get("Items"):
|
|
290
|
+
item = resp["Items"][0]
|
|
291
|
+
else:
|
|
292
|
+
while "LastEvaluatedKey" in resp and not item:
|
|
293
|
+
resp = table.query(**query_kwargs, ExclusiveStartKey=resp["LastEvaluatedKey"])
|
|
294
|
+
if resp.get("Items"):
|
|
295
|
+
item = resp["Items"][0]
|
|
296
|
+
|
|
297
|
+
if not item:
|
|
298
|
+
return []
|
|
299
|
+
history = item.get("status_history", [])
|
|
300
|
+
return [
|
|
301
|
+
{"timestamp": str(e.get("timestamp", "")), "message": str(e.get("message", ""))}
|
|
302
|
+
for e in history
|
|
303
|
+
]
|
|
304
|
+
except Exception:
|
|
305
|
+
return []
|