PyPI - gpu-dev - Versions diffs - 0.6.0__tar.gz → 0.6.3__tar.gz - Mend

gpu-dev 0.6.0tar.gz → 0.6.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (152) hide show

{gpu_dev-0.6.0 → gpu_dev-0.6.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gpu-dev
-Version: 0.6.0
+Version: 0.6.3
 Summary: CLI tool for PyTorch GPU developer server reservations
 Author: PyTorch Team
 Requires-Python: >=3.10
@@ -16,9 +16,29 @@ Requires-Dist: websockets>=12.0
 Requires-Dist: certifi>=2023.7.22
 Requires-Dist: mcp>=1.0.0
-# GPU Developer CLI
+# GPU Developer CLI & SDK
-A command-line tool for reserving and managing GPU development servers on AWS EKS.
+A command-line tool and Python SDK for reserving and managing GPU development servers.
+## Python SDK
+For programmatic access, use the [Python SDK](../../sdk/python/README.md):
+```python
+from gpu_dev import GpuDev
+client = GpuDev()
+sandbox = client.reserve(gpu_type="h100", gpu_count=2, hours=4)
+result = sandbox.exec("nvidia-smi")
+print(result.stdout)
+sandbox.cancel()
+```
+Install: `pip install -e sdk/python/` — see [SDK docs](../../sdk/python/README.md) and [quickstart notebook](../../sdk/python/examples/quickstart.ipynb).
+---
+## CLI
 ## Table of Contents

{gpu_dev-0.6.0 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/README.md RENAMED Viewed

@@ -1,6 +1,26 @@
-# GPU Developer CLI
+# GPU Developer CLI & SDK
-A command-line tool for reserving and managing GPU development servers on AWS EKS.
+A command-line tool and Python SDK for reserving and managing GPU development servers.
+## Python SDK
+For programmatic access, use the [Python SDK](../../sdk/python/README.md):
+```python
+from gpu_dev import GpuDev
+client = GpuDev()
+sandbox = client.reserve(gpu_type="h100", gpu_count=2, hours=4)
+result = sandbox.exec("nvidia-smi")
+print(result.stdout)
+sandbox.cancel()
+```
+Install: `pip install -e sdk/python/` — see [SDK docs](../../sdk/python/README.md) and [quickstart notebook](../../sdk/python/examples/quickstart.ipynb).
+---
+## CLI
 ## Table of Contents

{gpu_dev-0.6.0 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gpu-dev
-Version: 0.6.0
+Version: 0.6.3
 Summary: CLI tool for PyTorch GPU developer server reservations
 Author: PyTorch Team
 Requires-Python: >=3.10
@@ -16,9 +16,29 @@ Requires-Dist: websockets>=12.0
 Requires-Dist: certifi>=2023.7.22
 Requires-Dist: mcp>=1.0.0
-# GPU Developer CLI
+# GPU Developer CLI & SDK
-A command-line tool for reserving and managing GPU development servers on AWS EKS.
+A command-line tool and Python SDK for reserving and managing GPU development servers.
+## Python SDK
+For programmatic access, use the [Python SDK](../../sdk/python/README.md):
+```python
+from gpu_dev import GpuDev
+client = GpuDev()
+sandbox = client.reserve(gpu_type="h100", gpu_count=2, hours=4)
+result = sandbox.exec("nvidia-smi")
+print(result.stdout)
+sandbox.cancel()
+```
+Install: `pip install -e sdk/python/` — see [SDK docs](../../sdk/python/README.md) and [quickstart notebook](../../sdk/python/examples/quickstart.ipynb).
+---
+## CLI
 ## Table of Contents

{gpu_dev-0.6.0 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev.egg-info/SOURCES.txt RENAMED Viewed

@@ -30,6 +30,31 @@ docs/USER_GUIDE.md
 docs/devgpu-features.html
 docs/docker-mark-blue.svg
 docs/icons8-cursor-ai.svg
+sdk/python/README.md
+sdk/python/pyproject.toml
+sdk/python/examples/batch_multi_gpu.py
+sdk/python/examples/interactive_debug.py
+sdk/python/examples/quickstart.ipynb
+sdk/python/examples/run_tests.py
+sdk/python/examples/submit_job.py
+sdk/python/src/gpu_dev/__init__.py
+sdk/python/src/gpu_dev/py.typed
+sdk/python/src/gpu_dev/_async/__init__.py
+sdk/python/src/gpu_dev/_backend/__init__.py
+sdk/python/src/gpu_dev/_backend/aws.py
+sdk/python/src/gpu_dev/_backend/protocol.py
+sdk/python/src/gpu_dev/_sync/__init__.py
+sdk/python/src/gpu_dev/_sync/client.py
+sdk/python/src/gpu_dev/_sync/sandbox.py
+sdk/python/src/gpu_dev/_transport/__init__.py
+sdk/python/src/gpu_dev/_transport/ssh.py
+sdk/python/src/gpu_dev/common/__init__.py
+sdk/python/src/gpu_dev/common/config.py
+sdk/python/src/gpu_dev/common/enums.py
+sdk/python/src/gpu_dev/common/errors.py
+sdk/python/src/gpu_dev/common/models.py
+sdk/python/tests/__init__.py
+sdk/python/tests/test_models.py
 terraform-gpu-devservers/.terraform.lock.hcl
 terraform-gpu-devservers/README.md
 terraform-gpu-devservers/alb.tf

{gpu_dev-0.6.0 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/auth.py RENAMED Viewed

@@ -13,7 +13,7 @@ from rich.spinner import Spinner
 # SSH validation result is cached locally for 24h. New keys pushed to GitHub still take effect
 # at reservation time (pods fetch live keys via init container) — caching only skips the
 # pre-flight "are you who you say you are" check.
-_SSH_CACHE_TTL_SECONDS = 24 * 60 * 60
+_SSH_CACHE_TTL_SECONDS = 14 * 24 * 60 * 60
 _SSH_CACHE_PATH = Path(os.path.expanduser("~/.config/gpu-dev/ssh-validation-cache.json"))
 # Cache for authenticate_user. STS GetCallerIdentity is stable per AWS profile and slow under SSO

{gpu_dev-0.6.0 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/cli.py RENAMED Viewed

@@ -41,33 +41,51 @@ from .interactive import (
 console = Console()
+_east1_table = None
 def _fetch_reservations_cross_region(reservation_mgr, user_filter, statuses, config=None):
     """Fetch reservations from current region + prod-east1 if on prod."""
-    reservations = reservation_mgr.list_reservations(
-        user_filter=user_filter, statuses_to_include=statuses)
-    # Cross-region fetch
-    try:
+    global _east1_table
+    from concurrent.futures import ThreadPoolExecutor
+    def _fetch_primary():
+        return reservation_mgr.list_reservations(
+            user_filter=user_filter, statuses_to_include=statuses)
+    def _fetch_east1():
+        global _east1_table
         cfg = config or load_config()
-        if cfg.user_config.get("environment") == "prod":
-            east1_env = Config.ENVIRONMENTS.get("prod-east1", {})
-            if east1_env:
-                import boto3 as _b3
-                east1_ddb = _b3.resource("dynamodb", region_name=east1_env["region"])
-                east1_table = east1_ddb.Table("pytorch-gpu-dev-reservations")
-                for st in (statuses or ["active"]):
-                    resp = east1_table.query(
-                        IndexName="StatusIndex",
-                        KeyConditionExpression="#s = :status",
-                        ExpressionAttributeNames={"#s": "status"},
-                        ExpressionAttributeValues={":status": st},
-                    )
-                    for item in resp.get("Items", []):
-                        if user_filter and item.get("user_id") != user_filter:
-                            continue
-                        item["_region"] = "us-east-1"
-                        reservations.append(item)
+        if cfg.user_config.get("environment") != "prod":
+            return []
+        east1_env = Config.ENVIRONMENTS.get("prod-east1", {})
+        if not east1_env or not user_filter:
+            return []
+        if _east1_table is None:
+            _east1_table = cfg.session.resource(
+                "dynamodb", region_name=east1_env["region"]
+            ).Table("pytorch-gpu-dev-reservations")
+        results = []
+        for st in (statuses or ["active"]):
+            resp = _east1_table.query(
+                IndexName="UserStatusIndex",
+                KeyConditionExpression="user_id = :uid AND #s = :status",
+                ExpressionAttributeNames={"#s": "status"},
+                ExpressionAttributeValues={":uid": user_filter, ":status": st},
+            )
+            for item in resp.get("Items", []):
+                item["_region"] = "us-east-1"
+                results.append(item)
+        return results
+    try:
+        with ThreadPoolExecutor(max_workers=2) as ex:
+            f1 = ex.submit(_fetch_primary)
+            f2 = ex.submit(_fetch_east1)
+            reservations = f1.result()
+            reservations.extend(f2.result())
     except Exception:
-        pass
+        reservations = _fetch_primary()
     return reservations
@@ -608,6 +626,8 @@ def main(ctx: click.Context) -> None:
 )
 @click.option("--spot", is_flag=True, default=False,
               help="Acknowledge spot instance (~1/3 cost, may be preempted with 2-min notice). Required for spot-only types.")
+@click.option("--fast-cache", is_flag=True, default=False, hidden=True,
+              help="Use NVMe local cache for faster session restore (experimental).")
 @click.pass_context
 def reserve(
     ctx: click.Context,
@@ -629,6 +649,7 @@ def reserve(
     disk: Optional[str],
     node_label: tuple,
     spot: bool = False,
+    fast_cache: bool = False,
 ) -> None:
     """Reserve GPU development server(s)
@@ -746,7 +767,10 @@ def reserve(
                     else:
                         f_ssh = ex.submit(validate_ssh_key_matches_github_user, config, None)
                         ssh_result = None
-                    f_avail = ex.submit(reservation_mgr.get_gpu_availability_by_type)
+                    # Only fetch availability if we need the interactive picker
+                    need_interactive = gpu_type is None
+                    if need_interactive:
+                        f_avail = ex.submit(reservation_mgr.get_gpu_availability_by_type)
                     # Surface auth failure first (most actionable).
                     try:
@@ -758,7 +782,7 @@ def reserve(
                     if ssh_result is None:
                         ssh_result = f_ssh.result()
-                    availability_info = f_avail.result()
+                    availability_info = f_avail.result() if need_interactive else None
             # Surface SSH validation failure with the same UX as before.
             if not ssh_result.get("valid"):
@@ -1108,11 +1132,13 @@ def reserve(
                     rprint(f"[red]❌ {str(e)}[/red]")
                     return
-                # Validate SSH key matches configured GitHub username
-                live.update(Spinner("dots", text="🔐 Validating SSH key..."))
+                # Validate SSH key matches configured GitHub username (cached, ~0ms)
                 if not _validate_ssh_key_or_exit(config, live):
                     return
+                live.update(Spinner("dots", text="📡 Preparing reservation..."))
+                reservation_mgr = ReservationManager(config)
                 # Track if user explicitly requests no persistent disk
                 explicit_no_disk = explicit_no_disk_from_param
@@ -1166,8 +1192,10 @@ def reserve(
                                 # Build choices
                                 choices = []
-                                # Get available disks (exclude in-use and deleted disks)
-                                available_disks = [d for d in existing_disks if not d['in_use'] and not d.get('is_deleted', False)]
+                                # Show all non-deleted disks, marking in-use ones as disabled
+                                all_disks = [d for d in existing_disks if not d.get('is_deleted', False)]
+                                available_disks = [d for d in all_disks if not d['in_use']]
+                                in_use_disks = [d for d in all_disks if d['in_use']]
                                 if available_disks:
                                     choices.append(questionary.Separator("=== Available Disks ==="))
@@ -1178,6 +1206,17 @@ def reserve(
                                             value=("select", d['name'])
                                         ))
+                                if in_use_disks:
+                                    choices.append(questionary.Separator("=== In Use ==="))
+                                    for d in in_use_disks:
+                                        res_id = d.get('reservation_id', '?')[:8]
+                                        display = f"{d['name']} ({d['size_gb']}GB) — in use by {res_id}"
+                                        choices.append(questionary.Choice(
+                                            title=display,
+                                            value=("in_use", d['name']),
+                                            disabled="currently in use",
+                                        ))
                                 choices.append(questionary.Separator("=== Options ==="))
                                 choices.append(questionary.Choice(
                                     title="Create a new disk",
@@ -1224,11 +1263,6 @@ def reserve(
                                 rprint(f"[yellow]Use a different disk or wait for the reservation to end[/yellow]")
                                 return
-                live.update(
-                    Spinner("dots", text="📡 Setting up reservation manager...")
-                )
-                reservation_mgr = ReservationManager(config)
             # Submit reservation request
             live.update(
                 Spinner("dots", text="📡 Submitting reservation request...")
@@ -1364,6 +1398,7 @@ def reserve(
                     spot=spot,
                     node_labels=node_labels if node_labels else None,
                     trace=trace,
+                    fast_cache=fast_cache,
                 )
                 reservation_ids = [reservation_id] if reservation_id else None
@@ -2887,36 +2922,42 @@ def _show_availability() -> None:
         ) as live:
             config = load_config()
-            # Authenticate using AWS credentials
+            # Authenticate and fetch availability (both regions in parallel)
             try:
                 user_info = authenticate_user(config)
                 reservation_mgr = ReservationManager(config)
-                availability_info = reservation_mgr.get_gpu_availability_by_type()
+                from concurrent.futures import ThreadPoolExecutor
+                _env_name = config.user_config.get("environment", "prod")
+                _east1_spot_types = frozenset(Config.ENVIRONMENTS.get("prod-east1", {}).get("spot_types", []))
+                def _fetch_east1_spot():
+                    if _env_name != "prod" or not _east1_spot_types:
+                        return {}
+                    east1_r = Config.ENVIRONMENTS["prod-east1"]["region"]
+                    east1_table = config.session.resource("dynamodb", region_name=east1_r).Table("pytorch-gpu-dev-gpu-availability")
+                    result = {}
+                    for item in east1_table.scan().get("Items", []):
+                        gt = item.get("gpu_type", "")
+                        if gt in _east1_spot_types:
+                            result[gt] = {
+                                "available": int(item.get("available_gpus", 0)),
+                                "total": int(item.get("total_gpus", 0)),
+                                "max_reservable": int(item.get("max_reservable", 0)),
+                                "spot_info": item.get("spot_info", {}),
+                            }
+                    return result
+                with ThreadPoolExecutor(max_workers=2) as ex:
+                    f_avail = ex.submit(reservation_mgr.get_gpu_availability_by_type)
+                    f_spot = ex.submit(_fetch_east1_spot)
+                    availability_info = f_avail.result()
+                    spot_region_info = f_spot.result()
             except RuntimeError as e:
                 live.stop()
                 rprint(f"[red]❌ {str(e)}[/red]")
                 return
-        # Cross-region: fetch spot availability from prod-east1
-        spot_region_info = {}
-        _env_name = config.user_config.get("environment", "prod")
-        _east1_spot_types = frozenset(Config.ENVIRONMENTS.get("prod-east1", {}).get("spot_types", []))
-        if _env_name == "prod" and _east1_spot_types:
-            try:
-                import boto3 as _b3
-                east1_r = Config.ENVIRONMENTS["prod-east1"]["region"]
-                for item in _b3.resource("dynamodb", region_name=east1_r).Table("pytorch-gpu-dev-gpu-availability").scan().get("Items", []):
-                    gt = item.get("gpu_type", "")
-                    if gt in _east1_spot_types:
-                        spot_region_info[gt] = {
-                            "available": int(item.get("available_gpus", 0)),
-                            "total": int(item.get("total_gpus", 0)),
-                            "max_reservable": int(item.get("max_reservable", 0)),
-                            "spot_info": item.get("spot_info", {}),
-                        }
-            except Exception:
-                pass
         if availability_info:
             # GPU architecture mapping (for display)
             gpu_architectures = {
@@ -3273,8 +3314,28 @@ def connect(ctx: click.Context, reservation_id: Optional[str]) -> None:
     For VS Code Remote or manual SSH, use 'gpu-dev show' to see full SSH command.
     """
     import subprocess
+    from pathlib import Path
     try:
+        # Fast path: if reservation ID given, check local SSH config first (no network)
+        if reservation_id:
+            ssh_config_dir = Path.home() / ".gpu-dev"
+            config_file = ssh_config_dir / f"{reservation_id[:8]}-sshconfig"
+            if config_file.exists():
+                config_text = config_file.read_text()
+                fqdn_line = [l.strip() for l in config_text.splitlines() if l.strip().startswith("HostName")]
+                if fqdn_line:
+                    fqdn = fqdn_line[0].split(None, 1)[1]
+                    pod_name = f"gpu-dev-{reservation_id[:8]}"
+                    rprint(f"[cyan]Connecting to {pod_name}...[/cyan]\n")
+                    import subprocess, sys
+                    sys.exit(subprocess.call([
+                        "ssh", "-o", "StrictHostKeyChecking=no", "-o", "UserKnownHostsFile=/dev/null",
+                        "-o", "ProxyCommand=gpu-dev-ssh-proxy %h %p",
+                        "-o", "ForwardAgent=yes",
+                        f"dev@{fqdn}",
+                    ]))
         with Live(
             Spinner("dots", text="📡 Fetching reservation details..."), console=console
         ) as live:
@@ -3504,7 +3565,9 @@ def connect(ctx: click.Context, reservation_id: Optional[str]) -> None:
     except KeyboardInterrupt:
         rprint("\n[yellow]Connection cancelled by user[/yellow]")
     except Exception as e:
+        import traceback
         rprint(f"[red]❌ Error: {str(e)}[/red]")
+        traceback.print_exc()
 @main.command(name="get-ssh-config")

{gpu_dev-0.6.0 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/config.py RENAMED Viewed

@@ -3,6 +3,7 @@
 import os
 import json
 import boto3
+import botocore.exceptions
 from pathlib import Path
 from typing import Dict, Any, Optional
@@ -72,17 +73,63 @@ class Config:
         self._sqs_client = None
         self._dynamodb = None
+    _CRED_CACHE = Path.home() / ".config" / "gpu-dev" / "aws-cred-cache.json"
     def _create_aws_session(self):
-        """Create AWS session with profile support"""
-        available_profiles = boto3.Session().available_profiles
-        if "gpu-dev" in available_profiles:
-            try:
-                session = boto3.Session(profile_name="gpu-dev")
-                session.get_credentials()
-                return session
-            except Exception:
-                pass
-        return boto3.Session()
+        """Create AWS session, caching resolved credentials to skip SSO resolution (~900ms)."""
+        import time as _time
+        # Try cached credentials first (avoids 900ms SSO resolution)
+        try:
+            if self._CRED_CACHE.exists():
+                cached = json.loads(self._CRED_CACHE.read_text())
+                if _time.time() < cached.get("expires", 0):
+                    return boto3.Session(
+                        aws_access_key_id=cached["access_key"],
+                        aws_secret_access_key=cached["secret_key"],
+                        aws_session_token=cached["token"],
+                        region_name=self.aws_region,
+                    )
+        except Exception:
+            pass
+        # Resolve credentials from SSO/profile (slow path, ~900ms)
+        try:
+            session = boto3.Session(profile_name="gpu-dev")
+            creds = session.get_credentials()
+            if not creds:
+                raise Exception("no credentials")
+        except Exception:
+            session = boto3.Session()
+            creds = session.get_credentials()
+        # Cache resolved credentials (safe — they're short-lived STS tokens)
+        try:
+            frozen = creds.get_frozen_credentials()
+            if frozen.token:
+                self._CRED_CACHE.parent.mkdir(parents=True, exist_ok=True)
+                self._CRED_CACHE.write_text(json.dumps({
+                    "access_key": frozen.access_key,
+                    "secret_key": frozen.secret_key,
+                    "token": frozen.token,
+                    "expires": _time.time() + 2700,  # cache 45min (SSO tokens last ~1h)
+                }))
+                self._CRED_CACHE.chmod(0o600)
+        except Exception:
+            pass
+        return session
+    def refresh_session(self):
+        """Clear cached credentials and re-resolve. Called on ExpiredTokenException."""
+        try:
+            self._CRED_CACHE.unlink(missing_ok=True)
+        except Exception:
+            pass
+        self.session = self._create_aws_session()
+        self._sts_client = None
+        self._sqs_client = None
+        self._dynamodb = None
     @property
     def sts_client(self):

{gpu_dev-0.6.0 → gpu_dev-0.6.3}/cli-tools/gpu-dev-cli/gpu_dev_cli/reservations.py RENAMED Viewed

@@ -23,6 +23,8 @@ from .name_generator import sanitize_name
 def _spot_stage_number(status: str) -> tuple:
     """Map a spot provisioning status message to a numbered step (N, total)."""
     s = status.lower()
+    if "no spot capacity" in s or "no capacity" in s:
+        return 1, 7  # stuck at step 1, but message itself says why
     if "requested" in s or "waiting for aws" in s or "allocate capacity" in s:
         return 1, 7
     if "allocated" in s or "launching" in s or "booting" in s:
@@ -424,6 +426,18 @@ class ReservationManager:
         self.reservations_table = config.dynamodb.Table(
             config.reservations_table)
+    def _retry_on_expired(self, fn):
+        """Call fn, auto-refresh credentials on ExpiredTokenException."""
+        try:
+            return fn()
+        except Exception as e:
+            if "ExpiredToken" in str(type(e).__name__) or "expired" in str(e).lower():
+                self.config.refresh_session()
+                self.reservations_table = self.config.dynamodb.Table(
+                    self.config.reservations_table)
+                return fn()
+            raise
     def create_reservation(
         self,
         user_id: str,
@@ -442,6 +456,7 @@ class ReservationManager:
         node_labels: Optional[Dict[str, str]] = None,
         trace: bool = False,
         spot: bool = False,
+        fast_cache: bool = False,
     ) -> Optional[str]:
         """Create a new GPU reservation"""
         try:
@@ -524,6 +539,9 @@ class ReservationManager:
             if spot:
                 message["spot"] = True
+            if fast_cache:
+                message["fast_cache"] = True
             # Add trace flag and CLI start timestamp
             if trace:
                 message["trace"] = True
@@ -801,20 +819,21 @@ class ReservationManager:
         For multi-node reservations, returns info for all nodes in the group.
         """
         try:
-            # Query by user first (efficient), then filter by reservation_id prefix
+            # Short ID prefix — query UserIndex with server-side filter
             response = self.reservations_table.query(
                 IndexName="UserIndex",
                 KeyConditionExpression="user_id = :user_id",
-                ExpressionAttributeValues={":user_id": user_id},
+                FilterExpression="begins_with(reservation_id, :rid)",
+                ExpressionAttributeValues={":user_id": user_id, ":rid": reservation_id},
             )
             all_reservations = response.get("Items", [])
-            # Handle pagination for UserIndex query
             while "LastEvaluatedKey" in response:
                 response = self.reservations_table.query(
                     IndexName="UserIndex",
                     KeyConditionExpression="user_id = :user_id",
-                    ExpressionAttributeValues={":user_id": user_id},
+                    FilterExpression="begins_with(reservation_id, :rid)",
+                    ExpressionAttributeValues={":user_id": user_id, ":rid": reservation_id},
                     ExclusiveStartKey=response["LastEvaluatedKey"]
                 )
                 all_reservations.extend(response.get("Items", []))
@@ -1078,9 +1097,16 @@ class ReservationManager:
                 )
                 all_items.extend(response.get("Items", []))
+            # Fetch queue lengths for all GPU types in parallel
+            from concurrent.futures import ThreadPoolExecutor
+            gpu_types_list = [item["gpu_type"] for item in all_items]
+            with ThreadPoolExecutor(max_workers=10) as ex:
+                queue_futures = {gt: ex.submit(self._get_queue_length_for_gpu_type, gt) for gt in gpu_types_list}
+                queue_lengths = {gt: f.result() for gt, f in queue_futures.items()}
             for item in all_items:
                 gpu_type = item["gpu_type"]
-                queue_length = self._get_queue_length_for_gpu_type(gpu_type)
+                queue_length = queue_lengths.get(gpu_type, 0)
                 estimated_wait = queue_length * 15 if queue_length > 0 else 0
                 # size_etas is a DDB Map of {size_str: epoch_seconds (Decimal)} — pass through
@@ -1210,7 +1236,6 @@ class ReservationManager:
         try:
             total_count = 0
-            # Count queued reservations for this GPU type
             for status in ["queued", "pending"]:
                 try:
                     response = self.reservations_table.query(
@@ -1221,10 +1246,10 @@ class ReservationManager:
                             ":status": status,
                             ":gpu_type": gpu_type,
                         },
+                        Select="COUNT",
                     )
-                    total_count += len(response.get("Items", []))
+                    total_count += response.get("Count", 0)
-                    # Handle pagination for StatusGpuTypeIndex query
                     while "LastEvaluatedKey" in response:
                         response = self.reservations_table.query(
                             IndexName="StatusGpuTypeIndex",
@@ -1234,9 +1259,10 @@ class ReservationManager:
                                 ":status": status,
                                 ":gpu_type": gpu_type,
                             },
+                            Select="COUNT",
                             ExclusiveStartKey=response["LastEvaluatedKey"]
                         )
-                        total_count += len(response.get("Items", []))
+                        total_count += response.get("Count", 0)
                 except Exception as query_error:
                     # Fallback to scanning if the composite index doesn't exist yet
                     console.print(
@@ -1904,9 +1930,12 @@ class ReservationManager:
                                 detailed = first_queued.get("current_detailed_status", "")
                                 # Spot stages come through current_detailed_status — show as
                                 # numbered steps so users see progress and don't give up.
-                                if detailed and ("spot" in detailed.lower() or "node" in detailed.lower() or "instance" in detailed.lower()):
+                                if detailed and ("spot" in detailed.lower() or "node" in detailed.lower() or "instance" in detailed.lower() or "capacity" in detailed.lower()):
                                     step, total = _spot_stage_number(detailed)
-                                    message = f"⏳ Step {step}/{total}: {detailed}"
+                                    if "no spot capacity" in detailed.lower() or "no capacity" in detailed.lower():
+                                        message = f"⚠️  {detailed}"
+                                    else:
+                                        message = f"⏳ Step {step}/{total}: {detailed}"
                                 elif is_multinode:
                                     total_gpus = sum(
                                         node["gpu_count"] for node in node_details if node["reservation"])

{gpu_dev-0.6.0 → gpu_dev-0.6.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "gpu-dev"
-version = "0.6.0"
+version = "0.6.3"
 description = "CLI tool for PyTorch GPU developer server reservations"
 authors = [{name = "PyTorch Team"}]
 readme = "cli-tools/gpu-dev-cli/README.md"

gpu-dev 0.6.0__tar.gz → 0.6.3__tar.gz

gpu-dev 0.6.0tar.gz → 0.6.3tar.gz