PyPI - lattice-sub - Versions diffs - 1.1.3__tar.gz → 1.2.2__tar.gz - Mend

lattice-sub 1.1.3tar.gz → 1.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{lattice_sub-1.1.3 → lattice_sub-1.2.2}/MANIFEST.in RENAMED Viewed

@@ -16,6 +16,7 @@ prune tests
 # Exclude internal/development files
 exclude NOTES_*.md
 exclude *.log
+exclude environment.yml
 exclude benchmark_*.py
 exclude analyze_thresholds.py
 exclude test_search_strategies.py

{lattice_sub-1.1.3/src/lattice_sub.egg-info → lattice_sub-1.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lattice-sub
-Version: 1.1.3
+Version: 1.2.2
 Summary: Lattice subtraction for cryo-EM micrographs - removes periodic crystal signals to reveal non-periodic features
 Author-email: George Stephenson <george.stephenson@colorado.edu>, Vignesh Kasinath <vignesh.kasinath@colorado.edu>
 License: MIT
@@ -172,6 +172,76 @@ python -c "import torch; print(torch.cuda.get_device_name(0) if torch.cuda.is_av
 ---
+## Multi-GPU Support
+When processing batches on systems with multiple GPUs, files are automatically distributed across all available GPUs for faster processing. No extra flags needed!
+```bash
+# Automatically uses all available GPUs
+lattice-sub batch input_folder/ output_folder/ -p 0.56
+```
+**Example with 2 GPUs and 100 images:**
+- GPU 0: processes images 1-50
+- GPU 1: processes images 51-100
+- Single progress bar shows combined progress
+This provides near-linear speedup with additional GPUs.
+---
+## HPC Example (CU Boulder Alpine)
+Using [Open OnDemand Core Desktop](https://curc.readthedocs.io/en/latest/open_ondemand/core_desktop.html) with 2× RTX 8000 GPUs:
+```bash
+# Create environment
+module load anaconda
+conda create -n lattice_test python=3.11 -y
+conda activate lattice_test
+pip install lattice-sub
+# Process 100 micrographs
+lattice-sub batch input/ output/ -p 0.56
+```
+**Output:**
+```
+Phase-preserving FFT inpainting for cryo-EM  |  v1.2.2
+  Configuration
+  -------------
+    Pixel size:  0.56 A
+    Threshold:   auto
+    Backend:     Auto → GPU (Quadro RTX 8000)
+  Batch Processing
+  ----------------
+    Files:    100
+    Output:   /projects/user/output
+    Workers:  1
+✓ Using 2 GPUs: GPU 0, GPU 1
+  ✓ GPU 0: Quadro RTX 8000
+  ✓ GPU 1: Quadro RTX 8000
+  Processing: 100%|█████████████████████████| 100/100 [05:12<00:00,  3.13s/file]
+  [OK] Batch complete (312.9s)
+```
+**100 images processed in ~5 minutes** with automatic multi-GPU distribution.
+For compute-focused workloads, use Alpine's [GPU partitions](https://curc.readthedocs.io/en/latest/clusters/alpine/alpine-hardware.html) (A100, L40, MI100):
+```bash
+# Request 3 GPUs for 1 hour
+sinteractive --partition=aa100 --gres=gpu:3 --ntasks=16 --nodes=1 --time=01:00:00 --qos=normal
+```
+---
 ## Python API
 ```python

{lattice_sub-1.1.3 → lattice_sub-1.2.2}/README.md RENAMED Viewed

@@ -131,6 +131,76 @@ python -c "import torch; print(torch.cuda.get_device_name(0) if torch.cuda.is_av
 ---
+## Multi-GPU Support
+When processing batches on systems with multiple GPUs, files are automatically distributed across all available GPUs for faster processing. No extra flags needed!
+```bash
+# Automatically uses all available GPUs
+lattice-sub batch input_folder/ output_folder/ -p 0.56
+```
+**Example with 2 GPUs and 100 images:**
+- GPU 0: processes images 1-50
+- GPU 1: processes images 51-100
+- Single progress bar shows combined progress
+This provides near-linear speedup with additional GPUs.
+---
+## HPC Example (CU Boulder Alpine)
+Using [Open OnDemand Core Desktop](https://curc.readthedocs.io/en/latest/open_ondemand/core_desktop.html) with 2× RTX 8000 GPUs:
+```bash
+# Create environment
+module load anaconda
+conda create -n lattice_test python=3.11 -y
+conda activate lattice_test
+pip install lattice-sub
+# Process 100 micrographs
+lattice-sub batch input/ output/ -p 0.56
+```
+**Output:**
+```
+Phase-preserving FFT inpainting for cryo-EM  |  v1.2.2
+  Configuration
+  -------------
+    Pixel size:  0.56 A
+    Threshold:   auto
+    Backend:     Auto → GPU (Quadro RTX 8000)
+  Batch Processing
+  ----------------
+    Files:    100
+    Output:   /projects/user/output
+    Workers:  1
+✓ Using 2 GPUs: GPU 0, GPU 1
+  ✓ GPU 0: Quadro RTX 8000
+  ✓ GPU 1: Quadro RTX 8000
+  Processing: 100%|█████████████████████████| 100/100 [05:12<00:00,  3.13s/file]
+  [OK] Batch complete (312.9s)
+```
+**100 images processed in ~5 minutes** with automatic multi-GPU distribution.
+For compute-focused workloads, use Alpine's [GPU partitions](https://curc.readthedocs.io/en/latest/clusters/alpine/alpine-hardware.html) (A100, L40, MI100):
+```bash
+# Request 3 GPUs for 1 hour
+sinteractive --partition=aa100 --gres=gpu:3 --ntasks=16 --nodes=1 --time=01:00:00 --qos=normal
+```
+---
 ## Python API
 ```python

{lattice_sub-1.1.3 → lattice_sub-1.2.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "lattice-sub"
-version = "1.1.3"
+version = "1.2.2"
 description = "Lattice subtraction for cryo-EM micrographs - removes periodic crystal signals to reveal non-periodic features"
 readme = "README.md"
 license = {text = "MIT"}

{lattice_sub-1.1.3 → lattice_sub-1.2.2/src/lattice_sub.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lattice-sub
-Version: 1.1.3
+Version: 1.2.2
 Summary: Lattice subtraction for cryo-EM micrographs - removes periodic crystal signals to reveal non-periodic features
 Author-email: George Stephenson <george.stephenson@colorado.edu>, Vignesh Kasinath <vignesh.kasinath@colorado.edu>
 License: MIT
@@ -172,6 +172,76 @@ python -c "import torch; print(torch.cuda.get_device_name(0) if torch.cuda.is_av
 ---
+## Multi-GPU Support
+When processing batches on systems with multiple GPUs, files are automatically distributed across all available GPUs for faster processing. No extra flags needed!
+```bash
+# Automatically uses all available GPUs
+lattice-sub batch input_folder/ output_folder/ -p 0.56
+```
+**Example with 2 GPUs and 100 images:**
+- GPU 0: processes images 1-50
+- GPU 1: processes images 51-100
+- Single progress bar shows combined progress
+This provides near-linear speedup with additional GPUs.
+---
+## HPC Example (CU Boulder Alpine)
+Using [Open OnDemand Core Desktop](https://curc.readthedocs.io/en/latest/open_ondemand/core_desktop.html) with 2× RTX 8000 GPUs:
+```bash
+# Create environment
+module load anaconda
+conda create -n lattice_test python=3.11 -y
+conda activate lattice_test
+pip install lattice-sub
+# Process 100 micrographs
+lattice-sub batch input/ output/ -p 0.56
+```
+**Output:**
+```
+Phase-preserving FFT inpainting for cryo-EM  |  v1.2.2
+  Configuration
+  -------------
+    Pixel size:  0.56 A
+    Threshold:   auto
+    Backend:     Auto → GPU (Quadro RTX 8000)
+  Batch Processing
+  ----------------
+    Files:    100
+    Output:   /projects/user/output
+    Workers:  1
+✓ Using 2 GPUs: GPU 0, GPU 1
+  ✓ GPU 0: Quadro RTX 8000
+  ✓ GPU 1: Quadro RTX 8000
+  Processing: 100%|█████████████████████████| 100/100 [05:12<00:00,  3.13s/file]
+  [OK] Batch complete (312.9s)
+```
+**100 images processed in ~5 minutes** with automatic multi-GPU distribution.
+For compute-focused workloads, use Alpine's [GPU partitions](https://curc.readthedocs.io/en/latest/clusters/alpine/alpine-hardware.html) (A100, L40, MI100):
+```bash
+# Request 3 GPUs for 1 hour
+sinteractive --partition=aa100 --gres=gpu:3 --ntasks=16 --nodes=1 --time=01:00:00 --qos=normal
+```
+---
 ## Python API
 ```python

{lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_subtraction/__init__.py RENAMED Viewed

@@ -19,7 +19,7 @@ Example:
     >>> result.save("output.mrc")
 """
-__version__ = "1.1.3"
+__version__ = "1.2.2"
 __author__ = "George Stephenson & Vignesh Kasinath"
 from .config import Config

{lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_subtraction/batch.py RENAMED Viewed

@@ -1,12 +1,14 @@
 """
 Batch processing for multiple micrographs.
-This module provides parallel processing capabilities for large datasets.
+This module provides parallel processing capabilities for large datasets,
+including automatic multi-GPU support for systems with multiple CUDA devices.
 """
 import os
+import multiprocessing as mp
 from concurrent.futures import ProcessPoolExecutor, as_completed
-from dataclasses import dataclass
+from dataclasses import dataclass, asdict
 from pathlib import Path
 from typing import List, Tuple, Optional, Callable
 import logging
@@ -63,6 +65,91 @@ def _process_single_file(args: tuple) -> Tuple[Path, Optional[str]]:
         return (Path(input_path), str(e))
+def _gpu_worker(
+    gpu_id: int,
+    file_pairs: List[Tuple[str, str]],
+    config_dict: dict,
+    progress_queue: mp.Queue,
+    error_queue: mp.Queue,
+):
+    """
+    Worker function for multi-GPU processing.
+    Each worker processes its assigned files on a specific GPU and reports
+    progress through a shared queue.
+    Args:
+        gpu_id: CUDA device ID to use
+        file_pairs: List of (input_path, output_path) tuples
+        config_dict: Configuration dictionary
+        progress_queue: Queue to report progress (sends 1 for each completed file)
+        error_queue: Queue to report errors (sends (gpu_id, file_path, error_msg))
+    """
+    import torch
+    # Set this process to use the specific GPU
+    torch.cuda.set_device(gpu_id)
+    # Reconstruct config with the specific device_id and quiet mode
+    config_dict = config_dict.copy()
+    config_dict['device_id'] = gpu_id
+    config_dict['_quiet'] = True  # Suppress messages - main process handles this
+    config = Config(**config_dict)
+    # Create subtractor (messages suppressed via _quiet flag)
+    subtractor = LatticeSubtractor(config)
+    for input_path, output_path in file_pairs:
+        try:
+            result = subtractor.process(input_path)
+            result.save(output_path, pixel_size=config.pixel_ang)
+            progress_queue.put(1)
+        except Exception as e:
+            error_queue.put((gpu_id, input_path, str(e)))
+            return  # Fail-fast: exit on first error
+def _check_gpu_memory(device_id: int, image_shape: Tuple[int, int]) -> Tuple[bool, str]:
+    """
+    Check if GPU has sufficient memory for processing.
+    Args:
+        device_id: CUDA device ID
+        image_shape: (height, width) of image
+    Returns:
+        (is_ok, message) - True if sufficient memory, False with warning message
+    """
+    try:
+        import torch
+        free_mem, total_mem = torch.cuda.mem_get_info(device_id)
+        # Estimate memory needed: image + FFT (complex) + masks + overhead
+        # Roughly 16x image size for safe margin (complex FFT, intermediate buffers)
+        image_bytes = image_shape[0] * image_shape[1] * 4  # float32
+        estimated_need = image_bytes * 16
+        if free_mem < estimated_need:
+            return False, (
+                f"GPU {device_id}: {free_mem / 1e9:.1f}GB free, "
+                f"need ~{estimated_need / 1e9:.1f}GB"
+            )
+        return True, ""
+    except Exception as e:
+        return True, ""  # If we can't check, proceed anyway
+def _get_available_gpus() -> List[int]:
+    """Get list of available CUDA GPU device IDs."""
+    try:
+        import torch
+        if torch.cuda.is_available():
+            return list(range(torch.cuda.device_count()))
+        return []
+    except ImportError:
+        return []
 class BatchProcessor:
     """
     Parallel batch processor for micrograph datasets.
@@ -157,6 +244,9 @@ class BatchProcessor:
         """
         Process a list of input/output file pairs.
+        Automatically uses multi-GPU processing when multiple GPUs are available.
+        Files are distributed evenly across GPUs in chunks.
         Args:
             file_pairs: List of (input_path, output_path) tuples
             show_progress: If True, show progress bar
@@ -168,8 +258,7 @@ class BatchProcessor:
         successful = 0
         failed_files = []
-        # Check if using GPU - if so, process sequentially to avoid CUDA fork issues
-        # With "auto" backend, check if PyTorch + CUDA is actually available
+        # Check if using GPU - if so, check for multi-GPU capability
         use_gpu = self.config.backend == "pytorch"
         if self.config.backend == "auto":
             try:
@@ -179,10 +268,19 @@ class BatchProcessor:
                 use_gpu = False
         if use_gpu:
-            # Sequential processing for GPU (CUDA doesn't support fork multiprocessing)
-            successful, failed_files = self._process_sequential(
-                file_pairs, show_progress
-            )
+            # Check how many GPUs are available
+            available_gpus = _get_available_gpus()
+            if len(available_gpus) > 1 and total > 1:
+                # Multi-GPU processing
+                successful, failed_files = self._process_multi_gpu(
+                    file_pairs, available_gpus, show_progress
+                )
+            else:
+                # Single GPU - sequential processing
+                successful, failed_files = self._process_sequential(
+                    file_pairs, show_progress
+                )
         else:
             # Parallel processing for CPU
             successful, failed_files = self._process_parallel(
@@ -284,6 +382,159 @@ class BatchProcessor:
         return successful, failed_files
+    def _process_multi_gpu(
+        self,
+        file_pairs: List[Tuple[Path, Path]],
+        gpu_ids: List[int],
+        show_progress: bool = True,
+    ) -> Tuple[int, List[Tuple[Path, str]]]:
+        """
+        Process files in parallel across multiple GPUs.
+        Files are distributed evenly across GPUs in chunks.
+        Uses spawn-based multiprocessing to avoid CUDA fork issues.
+        Args:
+            file_pairs: List of (input_path, output_path) tuples
+            gpu_ids: List of CUDA device IDs to use
+            show_progress: If True, show unified progress bar
+        Returns:
+            (successful_count, failed_files_list)
+        """
+        import time
+        total = len(file_pairs)
+        num_gpus = len(gpu_ids)
+        # Print multi-GPU info with GPU names
+        try:
+            import torch
+            gpu_names = [torch.cuda.get_device_name(i) for i in gpu_ids]
+            print(f"✓ Using {num_gpus} GPUs: {', '.join(f'GPU {i}' for i in gpu_ids)}")
+            print("")
+            for i, name in zip(gpu_ids, gpu_names):
+                print(f"  ✓ GPU {i}: {name}")
+        except Exception:
+            print(f"✓ Using {num_gpus} GPUs")
+        # Check GPU memory on first GPU (assume similar for all)
+        if file_pairs:
+            try:
+                sample_image = read_mrc(file_pairs[0][0])
+                is_ok, msg = _check_gpu_memory(gpu_ids[0], sample_image.shape)
+                if not is_ok:
+                    print(f"⚠ Memory warning: {msg}")
+            except Exception:
+                pass  # Proceed anyway
+        # Distribute files evenly across GPUs (chunked distribution)
+        chunk_size = (total + num_gpus - 1) // num_gpus  # Ceiling division
+        gpu_file_assignments = []
+        for i, gpu_id in enumerate(gpu_ids):
+            start_idx = i * chunk_size
+            end_idx = min(start_idx + chunk_size, total)
+            if start_idx < total:
+                chunk = [(str(inp), str(out)) for inp, out in file_pairs[start_idx:end_idx]]
+                gpu_file_assignments.append((gpu_id, chunk))
+        # Create shared queues for progress and errors
+        # Use 'spawn' context to avoid CUDA fork issues
+        ctx = mp.get_context('spawn')
+        progress_queue = ctx.Queue()
+        error_queue = ctx.Queue()
+        # Create progress bar (after all GPU info printed)
+        if show_progress:
+            print()  # Blank line for visual separation
+            pbar = tqdm(
+                total=total,
+                desc="  Processing",
+                unit="file",
+                ncols=80,
+                leave=True,
+            )
+        else:
+            pbar = None
+        # Start worker processes
+        processes = []
+        for gpu_id, file_chunk in gpu_file_assignments:
+            p = ctx.Process(
+                target=_gpu_worker,
+                args=(gpu_id, file_chunk, self._config_dict, progress_queue, error_queue),
+            )
+            p.start()
+            processes.append(p)
+        # Monitor progress and check for errors
+        successful = 0
+        failed_files = []
+        completed = 0
+        while completed < total:
+            # Check for progress updates (non-blocking with timeout)
+            try:
+                while True:
+                    progress_queue.get(timeout=0.1)
+                    successful += 1
+                    completed += 1
+                    if pbar:
+                        pbar.update(1)
+            except:
+                pass  # Queue empty, continue
+            # Check for errors (non-blocking)
+            try:
+                while True:
+                    gpu_id, file_path, error_msg = error_queue.get_nowait()
+                    failed_files.append((Path(file_path), error_msg))
+                    completed += 1
+                    if pbar:
+                        pbar.update(1)
+                    # Fail-fast: terminate all workers and report
+                    print(f"\n✗ GPU {gpu_id} failed on {Path(file_path).name}: {error_msg}")
+                    print(f"\nTip: Try a different configuration:")
+                    print(f"  lattice-sub batch <input> <output> -p {self.config.pixel_ang} --cpu -j 8")
+                    # Terminate all processes
+                    for p in processes:
+                        if p.is_alive():
+                            p.terminate()
+                    if pbar:
+                        pbar.close()
+                    return successful, failed_files
+            except:
+                pass  # No errors, continue
+            # Check if all processes have finished
+            all_done = all(not p.is_alive() for p in processes)
+            if all_done:
+                # Drain remaining queue items
+                try:
+                    while True:
+                        progress_queue.get_nowait()
+                        successful += 1
+                        completed += 1
+                        if pbar:
+                            pbar.update(1)
+                except:
+                    pass
+                break
+        # Wait for all processes to finish
+        for p in processes:
+            p.join(timeout=1.0)
+        if pbar:
+            pbar.close()
+        return successful, failed_files
     def process_numbered_sequence(
         self,
         input_pattern: str,

{lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_subtraction/cli.py RENAMED Viewed

@@ -122,8 +122,11 @@ def setup_logging(verbose: bool, interactive: bool = False) -> None:
     )
+from . import __version__
 @click.group()
-@click.version_option(version="1.0.10", prog_name="lattice-sub")
+@click.version_option(version=__version__, prog_name="lattice-sub")
 def main():
     """
     Lattice Subtraction for Cryo-EM Micrographs.

{lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_subtraction/config.py RENAMED Viewed

@@ -64,6 +64,13 @@ class Config:
     # Enabled by default when GPU is available
     use_kornia: bool = True
+    # GPU device ID for multi-GPU support. None = auto-select (GPU 0 for single-GPU mode)
+    # When using multi-GPU batch processing, this is set automatically per worker
+    device_id: Optional[int] = None
+    # Internal flag to suppress status messages (used by batch workers)
+    _quiet: bool = False
     def __post_init__(self):
         """Validate and set auto-calculated parameters."""
         if self.pixel_ang <= 0:

{lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_subtraction/core.py RENAMED Viewed

@@ -83,21 +83,31 @@ class LatticeSubtractor:
         Auto mode tries PyTorch+CUDA first, then PyTorch CPU, then NumPy.
         Prints user-friendly status message about which backend is active.
+        Uses config.device_id if specified for multi-GPU support.
         """
         backend = self.config.backend
         self._gpu_message_shown = getattr(self, '_gpu_message_shown', False)
+        # Check if quiet mode (suppress messages for batch workers)
+        quiet = getattr(self.config, '_quiet', False)
+        if quiet:
+            self._gpu_message_shown = True
+        # Get device ID from config (None means auto-select GPU 0)
+        device_id = self.config.device_id if self.config.device_id is not None else 0
         # Auto mode: try GPU first, then CPU
         if backend == "auto":
             try:
                 import torch
                 if torch.cuda.is_available():
-                    self.device = torch.device('cuda')
+                    self.device = torch.device(f'cuda:{device_id}')
                     self.use_gpu = True
                     # Only print once per session (batch processing reuses subtractor)
                     if not self._gpu_message_shown:
-                        gpu_name = torch.cuda.get_device_name(0)
-                        print(f"✓ Using GPU: {gpu_name}")
+                        gpu_name = torch.cuda.get_device_name(device_id)
+                        print(f"✓ Using GPU {device_id}: {gpu_name}")
                         self._gpu_message_shown = True
                 else:
                     self.device = torch.device('cpu')
@@ -116,7 +126,7 @@ class LatticeSubtractor:
             try:
                 import torch
                 if torch.cuda.is_available():
-                    self.device = torch.device('cuda')
+                    self.device = torch.device(f'cuda:{device_id}')
                     self.use_gpu = True
                 else:
                     import warnings