lattice-sub 1.1.4__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/MANIFEST.in +1 -0
  2. {lattice_sub-1.1.4/src/lattice_sub.egg-info → lattice_sub-1.3.0}/PKG-INFO +84 -2
  3. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/README.md +83 -1
  4. lattice_sub-1.3.0/docs/images/example_comparison.png +0 -0
  5. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/examples/config.yaml +2 -2
  6. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/pyproject.toml +1 -1
  7. {lattice_sub-1.1.4 → lattice_sub-1.3.0/src/lattice_sub.egg-info}/PKG-INFO +84 -2
  8. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/__init__.py +1 -1
  9. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/batch.py +259 -8
  10. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/cli.py +15 -2
  11. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/config.py +7 -0
  12. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/core.py +14 -4
  13. lattice_sub-1.3.0/src/lattice_subtraction/visualization.py +368 -0
  14. lattice_sub-1.1.4/docs/images/example_comparison.png +0 -0
  15. lattice_sub-1.1.4/src/lattice_subtraction/visualization.py +0 -199
  16. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/LICENSE +0 -0
  17. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/docs/images/threshold_analysis.png +0 -0
  18. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/examples/converted_params.yaml +0 -0
  19. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/setup.cfg +0 -0
  20. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_sub.egg-info/SOURCES.txt +0 -0
  21. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_sub.egg-info/dependency_links.txt +0 -0
  22. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_sub.egg-info/entry_points.txt +0 -0
  23. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_sub.egg-info/requires.txt +0 -0
  24. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_sub.egg-info/top_level.txt +0 -0
  25. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/io.py +0 -0
  26. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/masks.py +0 -0
  27. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/processing.py +0 -0
  28. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/threshold_optimizer.py +0 -0
  29. {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/ui.py +0 -0
@@ -16,6 +16,7 @@ prune tests
16
16
  # Exclude internal/development files
17
17
  exclude NOTES_*.md
18
18
  exclude *.log
19
+ exclude environment.yml
19
20
  exclude benchmark_*.py
20
21
  exclude analyze_thresholds.py
21
22
  exclude test_search_strategies.py
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lattice-sub
3
- Version: 1.1.4
3
+ Version: 1.3.0
4
4
  Summary: Lattice subtraction for cryo-EM micrographs - removes periodic crystal signals to reveal non-periodic features
5
5
  Author-email: George Stephenson <george.stephenson@colorado.edu>, Vignesh Kasinath <vignesh.kasinath@colorado.edu>
6
6
  License: MIT
@@ -94,7 +94,17 @@ lattice-sub batch input_folder/ output_folder/ --pixel-size 0.56
94
94
  lattice-sub batch input_folder/ output_folder/ --pixel-size 0.56 --vis comparisons/
95
95
  ```
96
96
 
97
- This creates side-by-side PNG images showing before/after/difference for each micrograph.
97
+ This creates 4-panel PNG comparison images for each micrograph showing:
98
+ 1. **Original** - Input micrograph
99
+ 2. **Subtracted** - Lattice-removed result
100
+ 3. **Difference** - What was removed (5x amplified)
101
+ 4. **Threshold Curve** - Threshold vs lattice removal efficacy
102
+
103
+ **Limit the number of visualizations:**
104
+ ```bash
105
+ # Generate visualizations for first 10 images only
106
+ lattice-sub batch input_folder/ output_folder/ -p 0.56 --vis comparisons/ -n 10
107
+ ```
98
108
 
99
109
  ---
100
110
 
@@ -105,6 +115,8 @@ This creates side-by-side PNG images showing before/after/difference for each mi
105
115
  | `-p, --pixel-size` | **Required.** Pixel size in Ångstroms |
106
116
  | `-o, --output` | Output file path (default: `sub_<input>`) |
107
117
  | `-t, --threshold` | Peak detection sensitivity (default: **auto** - optimized per image) |
118
+ | `--vis DIR` | Generate 4-panel comparison PNGs in DIR |
119
+ | `-n, --num-vis N` | Limit visualizations to first N images |
108
120
  | `--cpu` | Force CPU processing (GPU is used by default) |
109
121
  | `-q, --quiet` | Hide the banner and progress messages |
110
122
  | `-v, --verbose` | Show detailed processing information |
@@ -172,6 +184,76 @@ python -c "import torch; print(torch.cuda.get_device_name(0) if torch.cuda.is_av
172
184
 
173
185
  ---
174
186
 
187
+ ## Multi-GPU Support
188
+
189
+ When processing batches on systems with multiple GPUs, files are automatically distributed across all available GPUs for faster processing. No extra flags needed!
190
+
191
+ ```bash
192
+ # Automatically uses all available GPUs
193
+ lattice-sub batch input_folder/ output_folder/ -p 0.56
194
+ ```
195
+
196
+ **Example with 2 GPUs and 100 images:**
197
+ - GPU 0: processes images 1-50
198
+ - GPU 1: processes images 51-100
199
+ - Single progress bar shows combined progress
200
+
201
+ This provides near-linear speedup with additional GPUs.
202
+
203
+ ---
204
+
205
+ ## HPC Example (CU Boulder Alpine)
206
+
207
+ Using [Open OnDemand Core Desktop](https://curc.readthedocs.io/en/latest/open_ondemand/core_desktop.html) with 2× RTX 8000 GPUs:
208
+
209
+ ```bash
210
+ # Create environment
211
+ module load anaconda
212
+ conda create -n lattice_test python=3.11 -y
213
+ conda activate lattice_test
214
+ pip install lattice-sub
215
+
216
+ # Process 100 micrographs
217
+ lattice-sub batch input/ output/ -p 0.56
218
+ ```
219
+
220
+ **Output:**
221
+ ```
222
+ Phase-preserving FFT inpainting for cryo-EM | v1.3.0
223
+
224
+ Configuration
225
+ -------------
226
+ Pixel size: 0.56 A
227
+ Threshold: auto
228
+ Backend: Auto → GPU (Quadro RTX 8000)
229
+
230
+ Batch Processing
231
+ ----------------
232
+ Files: 100
233
+ Output: /projects/user/output
234
+ Workers: 1
235
+
236
+ ✓ Using 2 GPUs: GPU 0, GPU 1
237
+
238
+ ✓ GPU 0: Quadro RTX 8000
239
+ ✓ GPU 1: Quadro RTX 8000
240
+
241
+ Processing: 100%|█████████████████████████| 100/100 [05:12<00:00, 3.13s/file]
242
+
243
+ [OK] Batch complete (312.9s)
244
+ ```
245
+
246
+ **100 images processed in ~5 minutes** with automatic multi-GPU distribution.
247
+
248
+ For compute-focused workloads, use Alpine's [GPU partitions](https://curc.readthedocs.io/en/latest/clusters/alpine/alpine-hardware.html) (A100, L40, MI100):
249
+
250
+ ```bash
251
+ # Request 3 GPUs for 1 hour
252
+ sinteractive --partition=aa100 --gres=gpu:3 --ntasks=16 --nodes=1 --time=01:00:00 --qos=normal
253
+ ```
254
+
255
+ ---
256
+
175
257
  ## Python API
176
258
 
177
259
  ```python
@@ -53,7 +53,17 @@ lattice-sub batch input_folder/ output_folder/ --pixel-size 0.56
53
53
  lattice-sub batch input_folder/ output_folder/ --pixel-size 0.56 --vis comparisons/
54
54
  ```
55
55
 
56
- This creates side-by-side PNG images showing before/after/difference for each micrograph.
56
+ This creates 4-panel PNG comparison images for each micrograph showing:
57
+ 1. **Original** - Input micrograph
58
+ 2. **Subtracted** - Lattice-removed result
59
+ 3. **Difference** - What was removed (5x amplified)
60
+ 4. **Threshold Curve** - Threshold vs lattice removal efficacy
61
+
62
+ **Limit the number of visualizations:**
63
+ ```bash
64
+ # Generate visualizations for first 10 images only
65
+ lattice-sub batch input_folder/ output_folder/ -p 0.56 --vis comparisons/ -n 10
66
+ ```
57
67
 
58
68
  ---
59
69
 
@@ -64,6 +74,8 @@ This creates side-by-side PNG images showing before/after/difference for each mi
64
74
  | `-p, --pixel-size` | **Required.** Pixel size in Ångstroms |
65
75
  | `-o, --output` | Output file path (default: `sub_<input>`) |
66
76
  | `-t, --threshold` | Peak detection sensitivity (default: **auto** - optimized per image) |
77
+ | `--vis DIR` | Generate 4-panel comparison PNGs in DIR |
78
+ | `-n, --num-vis N` | Limit visualizations to first N images |
67
79
  | `--cpu` | Force CPU processing (GPU is used by default) |
68
80
  | `-q, --quiet` | Hide the banner and progress messages |
69
81
  | `-v, --verbose` | Show detailed processing information |
@@ -131,6 +143,76 @@ python -c "import torch; print(torch.cuda.get_device_name(0) if torch.cuda.is_av
131
143
 
132
144
  ---
133
145
 
146
+ ## Multi-GPU Support
147
+
148
+ When processing batches on systems with multiple GPUs, files are automatically distributed across all available GPUs for faster processing. No extra flags needed!
149
+
150
+ ```bash
151
+ # Automatically uses all available GPUs
152
+ lattice-sub batch input_folder/ output_folder/ -p 0.56
153
+ ```
154
+
155
+ **Example with 2 GPUs and 100 images:**
156
+ - GPU 0: processes images 1-50
157
+ - GPU 1: processes images 51-100
158
+ - Single progress bar shows combined progress
159
+
160
+ This provides near-linear speedup with additional GPUs.
161
+
162
+ ---
163
+
164
+ ## HPC Example (CU Boulder Alpine)
165
+
166
+ Using [Open OnDemand Core Desktop](https://curc.readthedocs.io/en/latest/open_ondemand/core_desktop.html) with 2× RTX 8000 GPUs:
167
+
168
+ ```bash
169
+ # Create environment
170
+ module load anaconda
171
+ conda create -n lattice_test python=3.11 -y
172
+ conda activate lattice_test
173
+ pip install lattice-sub
174
+
175
+ # Process 100 micrographs
176
+ lattice-sub batch input/ output/ -p 0.56
177
+ ```
178
+
179
+ **Output:**
180
+ ```
181
+ Phase-preserving FFT inpainting for cryo-EM | v1.3.0
182
+
183
+ Configuration
184
+ -------------
185
+ Pixel size: 0.56 A
186
+ Threshold: auto
187
+ Backend: Auto → GPU (Quadro RTX 8000)
188
+
189
+ Batch Processing
190
+ ----------------
191
+ Files: 100
192
+ Output: /projects/user/output
193
+ Workers: 1
194
+
195
+ ✓ Using 2 GPUs: GPU 0, GPU 1
196
+
197
+ ✓ GPU 0: Quadro RTX 8000
198
+ ✓ GPU 1: Quadro RTX 8000
199
+
200
+ Processing: 100%|█████████████████████████| 100/100 [05:12<00:00, 3.13s/file]
201
+
202
+ [OK] Batch complete (312.9s)
203
+ ```
204
+
205
+ **100 images processed in ~5 minutes** with automatic multi-GPU distribution.
206
+
207
+ For compute-focused workloads, use Alpine's [GPU partitions](https://curc.readthedocs.io/en/latest/clusters/alpine/alpine-hardware.html) (A100, L40, MI100):
208
+
209
+ ```bash
210
+ # Request 3 GPUs for 1 hour
211
+ sinteractive --partition=aa100 --gres=gpu:3 --ntasks=16 --nodes=1 --time=01:00:00 --qos=normal
212
+ ```
213
+
214
+ ---
215
+
134
216
  ## Python API
135
217
 
136
218
  ```python
@@ -1,9 +1,9 @@
1
- # Lattice Subtraction - Example Configuration (v1.1.0)
1
+ # Lattice Subtraction - Example Configuration
2
2
  #
3
3
  # This file contains all available configuration options.
4
4
  # Copy and modify for your specific dataset.
5
5
  #
6
- # NEW in v1.1.0: Auto-threshold and Kornia GPU acceleration are now defaults!
6
+ # Auto-threshold and Kornia GPU acceleration are defaults.
7
7
  # Just run `lattice-sub process image.mrc -p 0.56` for optimal results.
8
8
 
9
9
  # ============================================
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "lattice-sub"
7
- version = "1.1.4"
7
+ version = "1.3.0"
8
8
  description = "Lattice subtraction for cryo-EM micrographs - removes periodic crystal signals to reveal non-periodic features"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lattice-sub
3
- Version: 1.1.4
3
+ Version: 1.3.0
4
4
  Summary: Lattice subtraction for cryo-EM micrographs - removes periodic crystal signals to reveal non-periodic features
5
5
  Author-email: George Stephenson <george.stephenson@colorado.edu>, Vignesh Kasinath <vignesh.kasinath@colorado.edu>
6
6
  License: MIT
@@ -94,7 +94,17 @@ lattice-sub batch input_folder/ output_folder/ --pixel-size 0.56
94
94
  lattice-sub batch input_folder/ output_folder/ --pixel-size 0.56 --vis comparisons/
95
95
  ```
96
96
 
97
- This creates side-by-side PNG images showing before/after/difference for each micrograph.
97
+ This creates 4-panel PNG comparison images for each micrograph showing:
98
+ 1. **Original** - Input micrograph
99
+ 2. **Subtracted** - Lattice-removed result
100
+ 3. **Difference** - What was removed (5x amplified)
101
+ 4. **Threshold Curve** - Threshold vs lattice removal efficacy
102
+
103
+ **Limit the number of visualizations:**
104
+ ```bash
105
+ # Generate visualizations for first 10 images only
106
+ lattice-sub batch input_folder/ output_folder/ -p 0.56 --vis comparisons/ -n 10
107
+ ```
98
108
 
99
109
  ---
100
110
 
@@ -105,6 +115,8 @@ This creates side-by-side PNG images showing before/after/difference for each mi
105
115
  | `-p, --pixel-size` | **Required.** Pixel size in Ångstroms |
106
116
  | `-o, --output` | Output file path (default: `sub_<input>`) |
107
117
  | `-t, --threshold` | Peak detection sensitivity (default: **auto** - optimized per image) |
118
+ | `--vis DIR` | Generate 4-panel comparison PNGs in DIR |
119
+ | `-n, --num-vis N` | Limit visualizations to first N images |
108
120
  | `--cpu` | Force CPU processing (GPU is used by default) |
109
121
  | `-q, --quiet` | Hide the banner and progress messages |
110
122
  | `-v, --verbose` | Show detailed processing information |
@@ -172,6 +184,76 @@ python -c "import torch; print(torch.cuda.get_device_name(0) if torch.cuda.is_av
172
184
 
173
185
  ---
174
186
 
187
+ ## Multi-GPU Support
188
+
189
+ When processing batches on systems with multiple GPUs, files are automatically distributed across all available GPUs for faster processing. No extra flags needed!
190
+
191
+ ```bash
192
+ # Automatically uses all available GPUs
193
+ lattice-sub batch input_folder/ output_folder/ -p 0.56
194
+ ```
195
+
196
+ **Example with 2 GPUs and 100 images:**
197
+ - GPU 0: processes images 1-50
198
+ - GPU 1: processes images 51-100
199
+ - Single progress bar shows combined progress
200
+
201
+ This provides near-linear speedup with additional GPUs.
202
+
203
+ ---
204
+
205
+ ## HPC Example (CU Boulder Alpine)
206
+
207
+ Using [Open OnDemand Core Desktop](https://curc.readthedocs.io/en/latest/open_ondemand/core_desktop.html) with 2× RTX 8000 GPUs:
208
+
209
+ ```bash
210
+ # Create environment
211
+ module load anaconda
212
+ conda create -n lattice_test python=3.11 -y
213
+ conda activate lattice_test
214
+ pip install lattice-sub
215
+
216
+ # Process 100 micrographs
217
+ lattice-sub batch input/ output/ -p 0.56
218
+ ```
219
+
220
+ **Output:**
221
+ ```
222
+ Phase-preserving FFT inpainting for cryo-EM | v1.3.0
223
+
224
+ Configuration
225
+ -------------
226
+ Pixel size: 0.56 A
227
+ Threshold: auto
228
+ Backend: Auto → GPU (Quadro RTX 8000)
229
+
230
+ Batch Processing
231
+ ----------------
232
+ Files: 100
233
+ Output: /projects/user/output
234
+ Workers: 1
235
+
236
+ ✓ Using 2 GPUs: GPU 0, GPU 1
237
+
238
+ ✓ GPU 0: Quadro RTX 8000
239
+ ✓ GPU 1: Quadro RTX 8000
240
+
241
+ Processing: 100%|█████████████████████████| 100/100 [05:12<00:00, 3.13s/file]
242
+
243
+ [OK] Batch complete (312.9s)
244
+ ```
245
+
246
+ **100 images processed in ~5 minutes** with automatic multi-GPU distribution.
247
+
248
+ For compute-focused workloads, use Alpine's [GPU partitions](https://curc.readthedocs.io/en/latest/clusters/alpine/alpine-hardware.html) (A100, L40, MI100):
249
+
250
+ ```bash
251
+ # Request 3 GPUs for 1 hour
252
+ sinteractive --partition=aa100 --gres=gpu:3 --ntasks=16 --nodes=1 --time=01:00:00 --qos=normal
253
+ ```
254
+
255
+ ---
256
+
175
257
  ## Python API
176
258
 
177
259
  ```python
@@ -19,7 +19,7 @@ Example:
19
19
  >>> result.save("output.mrc")
20
20
  """
21
21
 
22
- __version__ = "1.1.4"
22
+ __version__ = "1.3.0"
23
23
  __author__ = "George Stephenson & Vignesh Kasinath"
24
24
 
25
25
  from .config import Config
@@ -1,12 +1,14 @@
1
1
  """
2
2
  Batch processing for multiple micrographs.
3
3
 
4
- This module provides parallel processing capabilities for large datasets.
4
+ This module provides parallel processing capabilities for large datasets,
5
+ including automatic multi-GPU support for systems with multiple CUDA devices.
5
6
  """
6
7
 
7
8
  import os
9
+ import multiprocessing as mp
8
10
  from concurrent.futures import ProcessPoolExecutor, as_completed
9
- from dataclasses import dataclass
11
+ from dataclasses import dataclass, asdict
10
12
  from pathlib import Path
11
13
  from typing import List, Tuple, Optional, Callable
12
14
  import logging
@@ -63,6 +65,91 @@ def _process_single_file(args: tuple) -> Tuple[Path, Optional[str]]:
63
65
  return (Path(input_path), str(e))
64
66
 
65
67
 
68
+ def _gpu_worker(
69
+ gpu_id: int,
70
+ file_pairs: List[Tuple[str, str]],
71
+ config_dict: dict,
72
+ progress_queue: mp.Queue,
73
+ error_queue: mp.Queue,
74
+ ):
75
+ """
76
+ Worker function for multi-GPU processing.
77
+
78
+ Each worker processes its assigned files on a specific GPU and reports
79
+ progress through a shared queue.
80
+
81
+ Args:
82
+ gpu_id: CUDA device ID to use
83
+ file_pairs: List of (input_path, output_path) tuples
84
+ config_dict: Configuration dictionary
85
+ progress_queue: Queue to report progress (sends 1 for each completed file)
86
+ error_queue: Queue to report errors (sends (gpu_id, file_path, error_msg))
87
+ """
88
+ import torch
89
+
90
+ # Set this process to use the specific GPU
91
+ torch.cuda.set_device(gpu_id)
92
+
93
+ # Reconstruct config with the specific device_id and quiet mode
94
+ config_dict = config_dict.copy()
95
+ config_dict['device_id'] = gpu_id
96
+ config_dict['_quiet'] = True # Suppress messages - main process handles this
97
+ config = Config(**config_dict)
98
+
99
+ # Create subtractor (messages suppressed via _quiet flag)
100
+ subtractor = LatticeSubtractor(config)
101
+
102
+ for input_path, output_path in file_pairs:
103
+ try:
104
+ result = subtractor.process(input_path)
105
+ result.save(output_path, pixel_size=config.pixel_ang)
106
+ progress_queue.put(1)
107
+ except Exception as e:
108
+ error_queue.put((gpu_id, input_path, str(e)))
109
+ return # Fail-fast: exit on first error
110
+
111
+
112
+ def _check_gpu_memory(device_id: int, image_shape: Tuple[int, int]) -> Tuple[bool, str]:
113
+ """
114
+ Check if GPU has sufficient memory for processing.
115
+
116
+ Args:
117
+ device_id: CUDA device ID
118
+ image_shape: (height, width) of image
119
+
120
+ Returns:
121
+ (is_ok, message) - True if sufficient memory, False with warning message
122
+ """
123
+ try:
124
+ import torch
125
+ free_mem, total_mem = torch.cuda.mem_get_info(device_id)
126
+
127
+ # Estimate memory needed: image + FFT (complex) + masks + overhead
128
+ # Roughly 16x image size for safe margin (complex FFT, intermediate buffers)
129
+ image_bytes = image_shape[0] * image_shape[1] * 4 # float32
130
+ estimated_need = image_bytes * 16
131
+
132
+ if free_mem < estimated_need:
133
+ return False, (
134
+ f"GPU {device_id}: {free_mem / 1e9:.1f}GB free, "
135
+ f"need ~{estimated_need / 1e9:.1f}GB"
136
+ )
137
+ return True, ""
138
+ except Exception as e:
139
+ return True, "" # If we can't check, proceed anyway
140
+
141
+
142
+ def _get_available_gpus() -> List[int]:
143
+ """Get list of available CUDA GPU device IDs."""
144
+ try:
145
+ import torch
146
+ if torch.cuda.is_available():
147
+ return list(range(torch.cuda.device_count()))
148
+ return []
149
+ except ImportError:
150
+ return []
151
+
152
+
66
153
  class BatchProcessor:
67
154
  """
68
155
  Parallel batch processor for micrograph datasets.
@@ -157,6 +244,9 @@ class BatchProcessor:
157
244
  """
158
245
  Process a list of input/output file pairs.
159
246
 
247
+ Automatically uses multi-GPU processing when multiple GPUs are available.
248
+ Files are distributed evenly across GPUs in chunks.
249
+
160
250
  Args:
161
251
  file_pairs: List of (input_path, output_path) tuples
162
252
  show_progress: If True, show progress bar
@@ -168,8 +258,7 @@ class BatchProcessor:
168
258
  successful = 0
169
259
  failed_files = []
170
260
 
171
- # Check if using GPU - if so, process sequentially to avoid CUDA fork issues
172
- # With "auto" backend, check if PyTorch + CUDA is actually available
261
+ # Check if using GPU - if so, check for multi-GPU capability
173
262
  use_gpu = self.config.backend == "pytorch"
174
263
  if self.config.backend == "auto":
175
264
  try:
@@ -179,10 +268,19 @@ class BatchProcessor:
179
268
  use_gpu = False
180
269
 
181
270
  if use_gpu:
182
- # Sequential processing for GPU (CUDA doesn't support fork multiprocessing)
183
- successful, failed_files = self._process_sequential(
184
- file_pairs, show_progress
185
- )
271
+ # Check how many GPUs are available
272
+ available_gpus = _get_available_gpus()
273
+
274
+ if len(available_gpus) > 1 and total > 1:
275
+ # Multi-GPU processing
276
+ successful, failed_files = self._process_multi_gpu(
277
+ file_pairs, available_gpus, show_progress
278
+ )
279
+ else:
280
+ # Single GPU - sequential processing
281
+ successful, failed_files = self._process_sequential(
282
+ file_pairs, show_progress
283
+ )
186
284
  else:
187
285
  # Parallel processing for CPU
188
286
  successful, failed_files = self._process_parallel(
@@ -284,6 +382,159 @@ class BatchProcessor:
284
382
 
285
383
  return successful, failed_files
286
384
 
385
+ def _process_multi_gpu(
386
+ self,
387
+ file_pairs: List[Tuple[Path, Path]],
388
+ gpu_ids: List[int],
389
+ show_progress: bool = True,
390
+ ) -> Tuple[int, List[Tuple[Path, str]]]:
391
+ """
392
+ Process files in parallel across multiple GPUs.
393
+
394
+ Files are distributed evenly across GPUs in chunks.
395
+ Uses spawn-based multiprocessing to avoid CUDA fork issues.
396
+
397
+ Args:
398
+ file_pairs: List of (input_path, output_path) tuples
399
+ gpu_ids: List of CUDA device IDs to use
400
+ show_progress: If True, show unified progress bar
401
+
402
+ Returns:
403
+ (successful_count, failed_files_list)
404
+ """
405
+ import time
406
+
407
+ total = len(file_pairs)
408
+ num_gpus = len(gpu_ids)
409
+
410
+ # Print multi-GPU info with GPU names
411
+ try:
412
+ import torch
413
+ gpu_names = [torch.cuda.get_device_name(i) for i in gpu_ids]
414
+ print(f"✓ Using {num_gpus} GPUs: {', '.join(f'GPU {i}' for i in gpu_ids)}")
415
+ print("")
416
+ for i, name in zip(gpu_ids, gpu_names):
417
+ print(f" ✓ GPU {i}: {name}")
418
+ except Exception:
419
+ print(f"✓ Using {num_gpus} GPUs")
420
+
421
+ # Check GPU memory on first GPU (assume similar for all)
422
+ if file_pairs:
423
+ try:
424
+ sample_image = read_mrc(file_pairs[0][0])
425
+ is_ok, msg = _check_gpu_memory(gpu_ids[0], sample_image.shape)
426
+ if not is_ok:
427
+ print(f"⚠ Memory warning: {msg}")
428
+ except Exception:
429
+ pass # Proceed anyway
430
+
431
+ # Distribute files evenly across GPUs (chunked distribution)
432
+ chunk_size = (total + num_gpus - 1) // num_gpus # Ceiling division
433
+ gpu_file_assignments = []
434
+
435
+ for i, gpu_id in enumerate(gpu_ids):
436
+ start_idx = i * chunk_size
437
+ end_idx = min(start_idx + chunk_size, total)
438
+ if start_idx < total:
439
+ chunk = [(str(inp), str(out)) for inp, out in file_pairs[start_idx:end_idx]]
440
+ gpu_file_assignments.append((gpu_id, chunk))
441
+
442
+ # Create shared queues for progress and errors
443
+ # Use 'spawn' context to avoid CUDA fork issues
444
+ ctx = mp.get_context('spawn')
445
+ progress_queue = ctx.Queue()
446
+ error_queue = ctx.Queue()
447
+
448
+ # Create progress bar (after all GPU info printed)
449
+ if show_progress:
450
+ print() # Blank line for visual separation
451
+ pbar = tqdm(
452
+ total=total,
453
+ desc=" Processing",
454
+ unit="file",
455
+ ncols=80,
456
+ leave=True,
457
+ )
458
+ else:
459
+ pbar = None
460
+
461
+ # Start worker processes
462
+ processes = []
463
+ for gpu_id, file_chunk in gpu_file_assignments:
464
+ p = ctx.Process(
465
+ target=_gpu_worker,
466
+ args=(gpu_id, file_chunk, self._config_dict, progress_queue, error_queue),
467
+ )
468
+ p.start()
469
+ processes.append(p)
470
+
471
+ # Monitor progress and check for errors
472
+ successful = 0
473
+ failed_files = []
474
+ completed = 0
475
+
476
+ while completed < total:
477
+ # Check for progress updates (non-blocking with timeout)
478
+ try:
479
+ while True:
480
+ progress_queue.get(timeout=0.1)
481
+ successful += 1
482
+ completed += 1
483
+ if pbar:
484
+ pbar.update(1)
485
+ except:
486
+ pass # Queue empty, continue
487
+
488
+ # Check for errors (non-blocking)
489
+ try:
490
+ while True:
491
+ gpu_id, file_path, error_msg = error_queue.get_nowait()
492
+ failed_files.append((Path(file_path), error_msg))
493
+ completed += 1
494
+ if pbar:
495
+ pbar.update(1)
496
+
497
+ # Fail-fast: terminate all workers and report
498
+ print(f"\n✗ GPU {gpu_id} failed on {Path(file_path).name}: {error_msg}")
499
+ print(f"\nTip: Try a different configuration:")
500
+ print(f" lattice-sub batch <input> <output> -p {self.config.pixel_ang} --cpu -j 8")
501
+
502
+ # Terminate all processes
503
+ for p in processes:
504
+ if p.is_alive():
505
+ p.terminate()
506
+
507
+ if pbar:
508
+ pbar.close()
509
+
510
+ return successful, failed_files
511
+ except:
512
+ pass # No errors, continue
513
+
514
+ # Check if all processes have finished
515
+ all_done = all(not p.is_alive() for p in processes)
516
+ if all_done:
517
+ # Drain remaining queue items
518
+ try:
519
+ while True:
520
+ progress_queue.get_nowait()
521
+ successful += 1
522
+ completed += 1
523
+ if pbar:
524
+ pbar.update(1)
525
+ except:
526
+ pass
527
+ break
528
+
529
+ # Wait for all processes to finish
530
+ for p in processes:
531
+ p.join(timeout=1.0)
532
+
533
+ if pbar:
534
+ pbar.close()
535
+
536
+ return successful, failed_files
537
+
287
538
  def process_numbered_sequence(
288
539
  self,
289
540
  input_pattern: str,