PyPI - podstack - Versions diffs - 1.2.0__py3-none-any.whl - Mend

podstack 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

podstack/__init__.py +222 -0
podstack/annotations.py +725 -0
podstack/client.py +322 -0
podstack/exceptions.py +125 -0
podstack/execution.py +291 -0
podstack/gpu_runner.py +1141 -0
podstack/models.py +274 -0
podstack/notebook.py +410 -0
podstack/registry/__init__.py +402 -0
podstack/registry/client.py +957 -0
podstack/registry/exceptions.py +107 -0
podstack/registry/experiment.py +227 -0
podstack/registry/model.py +273 -0
podstack/registry/model_utils.py +231 -0
podstack-1.2.0.dist-info/METADATA +299 -0
podstack-1.2.0.dist-info/RECORD +27 -0
podstack-1.2.0.dist-info/WHEEL +5 -0
podstack-1.2.0.dist-info/licenses/LICENSE +21 -0
podstack-1.2.0.dist-info/top_level.txt +2 -0
podstack_gpu/__init__.py +126 -0
podstack_gpu/app.py +675 -0
podstack_gpu/exceptions.py +35 -0
podstack_gpu/image.py +325 -0
podstack_gpu/runner.py +746 -0
podstack_gpu/secret.py +189 -0
podstack_gpu/utils.py +203 -0
podstack_gpu/volume.py +198 -0

podstack_gpu/secret.py ADDED Viewed

@@ -0,0 +1,189 @@
+"""Podstack Secret - Secure credential management for GPU functions."""
+from __future__ import annotations
+import os
+from typing import Optional, Dict
+from dataclasses import dataclass
+@dataclass
+class SecretConfig:
+    """Secret configuration."""
+    name: str
+    env_var: Optional[str] = None  # Environment variable to inject as
+    def to_dict(self) -> dict:
+        return {
+            "name": self.name,
+            "env_var": self.env_var,
+        }
+class Secret:
+    """
+    Secure secrets that can be injected into GPU functions.
+    Secrets are stored encrypted and only decrypted at runtime.
+    They can be injected as environment variables or accessed via the API.
+    Example:
+        # Reference a secret stored in Podstack
+        hf_token = podstack.Secret.from_name("huggingface-token")
+        @app.function(gpu="H100", secrets=[hf_token])
+        def train():
+            import os
+            # Secret is available as environment variable
+            token = os.environ["HUGGINGFACE_TOKEN"]
+        # Or inject from environment at deploy time
+        api_key = podstack.Secret.from_local_env("OPENAI_API_KEY")
+    """
+    def __init__(self, config: SecretConfig):
+        self._config = config
+    @classmethod
+    def from_name(
+        cls,
+        name: str,
+        environment_variable: str = None,
+        required: bool = True,
+    ) -> "Secret":
+        """
+        Reference a secret stored in Podstack by name.
+        Args:
+            name: Secret name in Podstack
+            environment_variable: Env var name to inject as (defaults to uppercased name)
+            required: If True, fail if secret doesn't exist
+        Example:
+            secret = podstack.Secret.from_name(
+                "huggingface-token",
+                environment_variable="HF_TOKEN"
+            )
+        """
+        env_var = environment_variable or name.upper().replace("-", "_")
+        return cls(SecretConfig(name=name, env_var=env_var))
+    @classmethod
+    def from_local_env(
+        cls,
+        env_var: str,
+        remote_name: str = None,
+    ) -> "Secret":
+        """
+        Create a secret from a local environment variable.
+        The value is read at deploy time and stored securely.
+        Args:
+            env_var: Local environment variable name
+            remote_name: Name to use remotely (defaults to env_var)
+        Example:
+            # Read OPENAI_API_KEY from local env and inject into function
+            secret = podstack.Secret.from_local_env("OPENAI_API_KEY")
+        """
+        return cls(SecretConfig(
+            name=remote_name or env_var,
+            env_var=env_var,
+        ))
+    @classmethod
+    def from_dict(cls, secrets: Dict[str, str]) -> "Secret":
+        """
+        Create a secret from a dictionary (for testing/development).
+        WARNING: Do not use in production! Values will be in code.
+        Args:
+            secrets: Dictionary of secret key-value pairs
+        Example:
+            # Only for local testing!
+            secret = podstack.Secret.from_dict({"API_KEY": "test-key"})
+        """
+        # Create a composite secret
+        return cls(SecretConfig(name="_dict_secret"))
+    @classmethod
+    def from_dotenv(cls, path: str = ".env") -> "Secret":
+        """
+        Load secrets from a .env file.
+        Args:
+            path: Path to .env file
+        Example:
+            secrets = podstack.Secret.from_dotenv(".env.production")
+        """
+        return cls(SecretConfig(name=f"_dotenv:{path}"))
+    @property
+    def name(self) -> str:
+        """Get the secret name."""
+        return self._config.name
+    @property
+    def env_var(self) -> str:
+        """Get the environment variable name."""
+        return self._config.env_var
+    def to_dict(self) -> dict:
+        """Convert to dictionary for API serialization."""
+        return self._config.to_dict()
+    def __repr__(self) -> str:
+        return f"Secret(name={self._config.name!r})"
+class SecretDict:
+    """
+    A dictionary of secrets that can be accessed in GPU functions.
+    Example:
+        secrets = podstack.SecretDict.from_name("my-secrets")
+        @app.function(gpu="H100", secrets=[secrets])
+        def train():
+            # Access secrets by key
+            api_key = secrets["API_KEY"]
+    """
+    def __init__(self, name: str):
+        self._name = name
+        self._values: Dict[str, str] = {}
+    @classmethod
+    def from_name(cls, name: str) -> "SecretDict":
+        """
+        Reference a secret dictionary stored in Podstack.
+        Args:
+            name: Secret dictionary name
+        """
+        return cls(name)
+    def __getitem__(self, key: str) -> str:
+        """Get a secret value by key."""
+        if key in self._values:
+            return self._values[key]
+        # In remote execution, this would be populated
+        return os.environ.get(f"{self._name.upper()}_{key.upper()}", "")
+    def get(self, key: str, default: str = None) -> Optional[str]:
+        """Get a secret value with a default."""
+        try:
+            return self[key]
+        except KeyError:
+            return default
+    @property
+    def name(self) -> str:
+        """Get the secret dict name."""
+        return self._name
+    def __repr__(self) -> str:
+        return f"SecretDict(name={self._name!r})"

podstack_gpu/utils.py ADDED Viewed

@@ -0,0 +1,203 @@
+"""Podstack GPU Utilities - Helper functions for GPU operations."""
+import subprocess
+import os
+from typing import Optional, Dict, Any, List
+def nvidia_smi(format: str = "text") -> str:
+    """
+    Get nvidia-smi output.
+    This function runs nvidia-smi and returns the output.
+    Use this inside your GPU functions to check GPU status.
+    Args:
+        format: Output format - "text" (default), "csv", or "xml"
+    Returns:
+        nvidia-smi output as string
+    Example:
+        @app.function(gpu="L40S")
+        def check_gpu():
+            from podstack import nvidia_smi
+            print(nvidia_smi())
+            return {"gpu_info": nvidia_smi("csv")}
+    """
+    try:
+        if format == "csv":
+            cmd = ["nvidia-smi", "--query-gpu=name,memory.total,memory.used,memory.free,utilization.gpu,temperature.gpu", "--format=csv"]
+        elif format == "xml":
+            cmd = ["nvidia-smi", "-x", "-q"]
+        else:
+            cmd = ["nvidia-smi"]
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
+        return result.stdout if result.returncode == 0 else result.stderr
+    except FileNotFoundError:
+        return "nvidia-smi not found. Make sure NVIDIA drivers are installed."
+    except subprocess.TimeoutExpired:
+        return "nvidia-smi timed out"
+    except Exception as e:
+        return f"Error running nvidia-smi: {e}"
+def gpu_info() -> Dict[str, Any]:
+    """
+    Get GPU information as a dictionary.
+    Returns:
+        Dictionary with GPU information including:
+        - name: GPU model name
+        - memory_total: Total memory in MB
+        - memory_used: Used memory in MB
+        - memory_free: Free memory in MB
+        - utilization: GPU utilization percentage
+        - temperature: GPU temperature in Celsius
+        - count: Number of GPUs
+    Example:
+        @app.function(gpu="L40S")
+        def check_resources():
+            from podstack import gpu_info
+            info = gpu_info()
+            print(f"GPU: {info['name']}")
+            print(f"Memory: {info['memory_used']}/{info['memory_total']} MB")
+            return info
+    """
+    try:
+        result = subprocess.run(
+            ["nvidia-smi", "--query-gpu=name,memory.total,memory.used,memory.free,utilization.gpu,temperature.gpu,count", "--format=csv,noheader,nounits"],
+            capture_output=True, text=True, timeout=10
+        )
+        if result.returncode != 0:
+            return {"error": result.stderr}
+        lines = result.stdout.strip().split("\n")
+        gpus = []
+        for line in lines:
+            parts = [p.strip() for p in line.split(",")]
+            if len(parts) >= 6:
+                gpus.append({
+                    "name": parts[0],
+                    "memory_total": int(parts[1]) if parts[1].isdigit() else parts[1],
+                    "memory_used": int(parts[2]) if parts[2].isdigit() else parts[2],
+                    "memory_free": int(parts[3]) if parts[3].isdigit() else parts[3],
+                    "utilization": int(parts[4]) if parts[4].isdigit() else parts[4],
+                    "temperature": int(parts[5]) if parts[5].isdigit() else parts[5],
+                })
+        if len(gpus) == 1:
+            return gpus[0]
+        return {
+            "count": len(gpus),
+            "gpus": gpus,
+            "name": gpus[0]["name"] if gpus else "Unknown",
+            "memory_total": sum(g.get("memory_total", 0) for g in gpus if isinstance(g.get("memory_total"), int)),
+            "memory_used": sum(g.get("memory_used", 0) for g in gpus if isinstance(g.get("memory_used"), int)),
+            "memory_free": sum(g.get("memory_free", 0) for g in gpus if isinstance(g.get("memory_free"), int)),
+        }
+    except FileNotFoundError:
+        return {"error": "nvidia-smi not found"}
+    except Exception as e:
+        return {"error": str(e)}
+def cuda_available() -> bool:
+    """
+    Check if CUDA is available.
+    Returns:
+        True if CUDA is available, False otherwise
+    Example:
+        @app.function(gpu="L40S")
+        def train():
+            from podstack import cuda_available
+            if cuda_available():
+                print("CUDA is ready!")
+            else:
+                print("Warning: CUDA not available")
+    """
+    try:
+        import torch
+        return torch.cuda.is_available()
+    except ImportError:
+        # Fallback to checking nvidia-smi
+        try:
+            result = subprocess.run(["nvidia-smi"], capture_output=True, timeout=5)
+            return result.returncode == 0
+        except:
+            return False
+def cuda_device_count() -> int:
+    """
+    Get the number of available CUDA devices.
+    Returns:
+        Number of CUDA devices
+    Example:
+        @app.function(gpu="A100-80G", count=4)
+        def distributed_train():
+            from podstack import cuda_device_count
+            print(f"Training on {cuda_device_count()} GPUs")
+    """
+    try:
+        import torch
+        return torch.cuda.device_count()
+    except ImportError:
+        # Fallback
+        info = gpu_info()
+        return info.get("count", 1) if "error" not in info else 0
+def print_gpu_status():
+    """
+    Print a formatted GPU status summary.
+    Example:
+        @app.function(gpu="L40S")
+        def my_function():
+            from podstack import print_gpu_status
+            print_gpu_status()
+            # ... your code ...
+    """
+    print("=" * 60)
+    print("GPU STATUS")
+    print("=" * 60)
+    info = gpu_info()
+    if "error" in info:
+        print(f"Error: {info['error']}")
+        return
+    if "gpus" in info:
+        # Multiple GPUs
+        print(f"Total GPUs: {info['count']}")
+        print(f"Total Memory: {info['memory_used']:,} / {info['memory_total']:,} MB")
+        print("-" * 60)
+        for i, gpu in enumerate(info["gpus"]):
+            print(f"GPU {i}: {gpu['name']}")
+            print(f"  Memory: {gpu['memory_used']:,} / {gpu['memory_total']:,} MB ({gpu['memory_free']:,} MB free)")
+            print(f"  Utilization: {gpu['utilization']}%")
+            print(f"  Temperature: {gpu['temperature']}°C")
+    else:
+        # Single GPU
+        print(f"GPU: {info.get('name', 'Unknown')}")
+        mem_used = info.get('memory_used', 0)
+        mem_total = info.get('memory_total', 0)
+        mem_free = info.get('memory_free', 0)
+        if isinstance(mem_used, int) and isinstance(mem_total, int):
+            print(f"Memory: {mem_used:,} / {mem_total:,} MB ({mem_free:,} MB free)")
+        print(f"Utilization: {info.get('utilization', 'N/A')}%")
+        print(f"Temperature: {info.get('temperature', 'N/A')}°C")
+    print("=" * 60)

podstack_gpu/volume.py ADDED Viewed

@@ -0,0 +1,198 @@
+"""Podstack Volume - Persistent storage for GPU functions."""
+from __future__ import annotations
+import os
+from typing import Optional
+from dataclasses import dataclass
+@dataclass
+class VolumeConfig:
+    """Volume configuration."""
+    name: str
+    size_gb: int = 10
+    region: Optional[str] = None
+    def to_dict(self) -> dict:
+        return {
+            "name": self.name,
+            "size_gb": self.size_gb,
+            "region": self.region,
+        }
+class Volume:
+    """
+    Persistent network storage that can be attached to GPU functions.
+    Volumes persist data across function invocations and can be shared
+    between functions.
+    Example:
+        # Create a volume
+        model_volume = podstack.Volume.from_name("model-cache", create_if_missing=True)
+        @app.function(gpu="H100", volumes={"/models": model_volume})
+        def train():
+            # Save models to /models - they persist!
+            torch.save(model, "/models/checkpoint.pt")
+        @app.function(gpu="H100", volumes={"/models": model_volume})
+        def inference():
+            # Load the model saved during training
+            model = torch.load("/models/checkpoint.pt")
+    """
+    def __init__(self, config: VolumeConfig):
+        self._config = config
+        self._persisted = False
+    @classmethod
+    def from_name(
+        cls,
+        name: str,
+        create_if_missing: bool = True,
+        size_gb: int = 10,
+        region: str = None,
+    ) -> "Volume":
+        """
+        Get or create a volume by name.
+        Args:
+            name: Volume name (must be unique within project)
+            create_if_missing: Create the volume if it doesn't exist
+            size_gb: Size in GB (only used when creating)
+            region: Region for the volume (optional)
+        Example:
+            volume = podstack.Volume.from_name("my-data", size_gb=100)
+        """
+        return cls(VolumeConfig(
+            name=name,
+            size_gb=size_gb,
+            region=region,
+        ))
+    @classmethod
+    def ephemeral(cls, size_gb: int = 10) -> "Volume":
+        """
+        Create an ephemeral volume that only lasts for the function invocation.
+        Useful for temporary storage during execution.
+        Args:
+            size_gb: Size in GB
+        Example:
+            temp = podstack.Volume.ephemeral(size_gb=50)
+            @app.function(gpu="H100", volumes={"/scratch": temp})
+            def process():
+                # Use /scratch for temporary files
+                ...
+        """
+        import uuid
+        return cls(VolumeConfig(
+            name=f"ephemeral-{uuid.uuid4().hex[:8]}",
+            size_gb=size_gb,
+        ))
+    @property
+    def name(self) -> str:
+        """Get the volume name."""
+        return self._config.name
+    @property
+    def size_gb(self) -> int:
+        """Get the volume size in GB."""
+        return self._config.size_gb
+    def to_dict(self) -> dict:
+        """Convert to dictionary for API serialization."""
+        return self._config.to_dict()
+    def __repr__(self) -> str:
+        return f"Volume(name={self._config.name!r}, size_gb={self._config.size_gb})"
+class CloudBucketMount:
+    """
+    Mount a cloud storage bucket (S3, GCS, etc.) as a volume.
+    Example:
+        bucket = podstack.CloudBucketMount.from_s3(
+            bucket_name="my-training-data",
+            secret=podstack.Secret.from_name("aws-credentials"),
+        )
+        @app.function(gpu="H100", volumes={"/data": bucket})
+        def train():
+            # Access S3 data at /data
+            data = load_data("/data/dataset.parquet")
+    """
+    def __init__(
+        self,
+        bucket_name: str,
+        provider: str,
+        secret: "Secret" = None,
+        read_only: bool = False,
+        prefix: str = None,
+    ):
+        self._bucket_name = bucket_name
+        self._provider = provider
+        self._secret = secret
+        self._read_only = read_only
+        self._prefix = prefix
+    @classmethod
+    def from_s3(
+        cls,
+        bucket_name: str,
+        secret: "Secret" = None,
+        read_only: bool = False,
+        prefix: str = None,
+    ) -> "CloudBucketMount":
+        """
+        Mount an S3 bucket.
+        Args:
+            bucket_name: S3 bucket name
+            secret: Secret containing AWS credentials
+            read_only: Mount as read-only
+            prefix: Only mount objects with this prefix
+        """
+        return cls(bucket_name, "s3", secret, read_only, prefix)
+    @classmethod
+    def from_gcs(
+        cls,
+        bucket_name: str,
+        secret: "Secret" = None,
+        read_only: bool = False,
+        prefix: str = None,
+    ) -> "CloudBucketMount":
+        """
+        Mount a Google Cloud Storage bucket.
+        Args:
+            bucket_name: GCS bucket name
+            secret: Secret containing GCP credentials
+            read_only: Mount as read-only
+            prefix: Only mount objects with this prefix
+        """
+        return cls(bucket_name, "gcs", secret, read_only, prefix)
+    def to_dict(self) -> dict:
+        """Convert to dictionary for API serialization."""
+        return {
+            "type": "cloud_bucket",
+            "bucket_name": self._bucket_name,
+            "provider": self._provider,
+            "secret": self._secret.name if self._secret else None,
+            "read_only": self._read_only,
+            "prefix": self._prefix,
+        }
+    def __repr__(self) -> str:
+        return f"CloudBucketMount({self._provider}://{self._bucket_name})"