lattice-sub 1.1.3__tar.gz → 1.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/MANIFEST.in +1 -0
- {lattice_sub-1.1.3/src/lattice_sub.egg-info → lattice_sub-1.2.2}/PKG-INFO +71 -1
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/README.md +70 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/pyproject.toml +1 -1
- {lattice_sub-1.1.3 → lattice_sub-1.2.2/src/lattice_sub.egg-info}/PKG-INFO +71 -1
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_subtraction/__init__.py +1 -1
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_subtraction/batch.py +259 -8
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_subtraction/cli.py +4 -1
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_subtraction/config.py +7 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_subtraction/core.py +14 -4
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/LICENSE +0 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/docs/images/example_comparison.png +0 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/docs/images/threshold_analysis.png +0 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/examples/config.yaml +0 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/examples/converted_params.yaml +0 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/setup.cfg +0 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_sub.egg-info/SOURCES.txt +0 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_sub.egg-info/dependency_links.txt +0 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_sub.egg-info/entry_points.txt +0 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_sub.egg-info/requires.txt +0 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_sub.egg-info/top_level.txt +0 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_subtraction/io.py +0 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_subtraction/masks.py +0 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_subtraction/processing.py +0 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_subtraction/threshold_optimizer.py +0 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_subtraction/ui.py +0 -0
- {lattice_sub-1.1.3 → lattice_sub-1.2.2}/src/lattice_subtraction/visualization.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lattice-sub
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.2
|
|
4
4
|
Summary: Lattice subtraction for cryo-EM micrographs - removes periodic crystal signals to reveal non-periodic features
|
|
5
5
|
Author-email: George Stephenson <george.stephenson@colorado.edu>, Vignesh Kasinath <vignesh.kasinath@colorado.edu>
|
|
6
6
|
License: MIT
|
|
@@ -172,6 +172,76 @@ python -c "import torch; print(torch.cuda.get_device_name(0) if torch.cuda.is_av
|
|
|
172
172
|
|
|
173
173
|
---
|
|
174
174
|
|
|
175
|
+
## Multi-GPU Support
|
|
176
|
+
|
|
177
|
+
When processing batches on systems with multiple GPUs, files are automatically distributed across all available GPUs for faster processing. No extra flags needed!
|
|
178
|
+
|
|
179
|
+
```bash
|
|
180
|
+
# Automatically uses all available GPUs
|
|
181
|
+
lattice-sub batch input_folder/ output_folder/ -p 0.56
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
**Example with 2 GPUs and 100 images:**
|
|
185
|
+
- GPU 0: processes images 1-50
|
|
186
|
+
- GPU 1: processes images 51-100
|
|
187
|
+
- Single progress bar shows combined progress
|
|
188
|
+
|
|
189
|
+
This provides near-linear speedup with additional GPUs.
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## HPC Example (CU Boulder Alpine)
|
|
194
|
+
|
|
195
|
+
Using [Open OnDemand Core Desktop](https://curc.readthedocs.io/en/latest/open_ondemand/core_desktop.html) with 2× RTX 8000 GPUs:
|
|
196
|
+
|
|
197
|
+
```bash
|
|
198
|
+
# Create environment
|
|
199
|
+
module load anaconda
|
|
200
|
+
conda create -n lattice_test python=3.11 -y
|
|
201
|
+
conda activate lattice_test
|
|
202
|
+
pip install lattice-sub
|
|
203
|
+
|
|
204
|
+
# Process 100 micrographs
|
|
205
|
+
lattice-sub batch input/ output/ -p 0.56
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
**Output:**
|
|
209
|
+
```
|
|
210
|
+
Phase-preserving FFT inpainting for cryo-EM | v1.2.2
|
|
211
|
+
|
|
212
|
+
Configuration
|
|
213
|
+
-------------
|
|
214
|
+
Pixel size: 0.56 A
|
|
215
|
+
Threshold: auto
|
|
216
|
+
Backend: Auto → GPU (Quadro RTX 8000)
|
|
217
|
+
|
|
218
|
+
Batch Processing
|
|
219
|
+
----------------
|
|
220
|
+
Files: 100
|
|
221
|
+
Output: /projects/user/output
|
|
222
|
+
Workers: 1
|
|
223
|
+
|
|
224
|
+
✓ Using 2 GPUs: GPU 0, GPU 1
|
|
225
|
+
|
|
226
|
+
✓ GPU 0: Quadro RTX 8000
|
|
227
|
+
✓ GPU 1: Quadro RTX 8000
|
|
228
|
+
|
|
229
|
+
Processing: 100%|█████████████████████████| 100/100 [05:12<00:00, 3.13s/file]
|
|
230
|
+
|
|
231
|
+
[OK] Batch complete (312.9s)
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
**100 images processed in ~5 minutes** with automatic multi-GPU distribution.
|
|
235
|
+
|
|
236
|
+
For compute-focused workloads, use Alpine's [GPU partitions](https://curc.readthedocs.io/en/latest/clusters/alpine/alpine-hardware.html) (A100, L40, MI100):
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
# Request 3 GPUs for 1 hour
|
|
240
|
+
sinteractive --partition=aa100 --gres=gpu:3 --ntasks=16 --nodes=1 --time=01:00:00 --qos=normal
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
---
|
|
244
|
+
|
|
175
245
|
## Python API
|
|
176
246
|
|
|
177
247
|
```python
|
|
@@ -131,6 +131,76 @@ python -c "import torch; print(torch.cuda.get_device_name(0) if torch.cuda.is_av
|
|
|
131
131
|
|
|
132
132
|
---
|
|
133
133
|
|
|
134
|
+
## Multi-GPU Support
|
|
135
|
+
|
|
136
|
+
When processing batches on systems with multiple GPUs, files are automatically distributed across all available GPUs for faster processing. No extra flags needed!
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
# Automatically uses all available GPUs
|
|
140
|
+
lattice-sub batch input_folder/ output_folder/ -p 0.56
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
**Example with 2 GPUs and 100 images:**
|
|
144
|
+
- GPU 0: processes images 1-50
|
|
145
|
+
- GPU 1: processes images 51-100
|
|
146
|
+
- Single progress bar shows combined progress
|
|
147
|
+
|
|
148
|
+
This provides near-linear speedup with additional GPUs.
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## HPC Example (CU Boulder Alpine)
|
|
153
|
+
|
|
154
|
+
Using [Open OnDemand Core Desktop](https://curc.readthedocs.io/en/latest/open_ondemand/core_desktop.html) with 2× RTX 8000 GPUs:
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
# Create environment
|
|
158
|
+
module load anaconda
|
|
159
|
+
conda create -n lattice_test python=3.11 -y
|
|
160
|
+
conda activate lattice_test
|
|
161
|
+
pip install lattice-sub
|
|
162
|
+
|
|
163
|
+
# Process 100 micrographs
|
|
164
|
+
lattice-sub batch input/ output/ -p 0.56
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
**Output:**
|
|
168
|
+
```
|
|
169
|
+
Phase-preserving FFT inpainting for cryo-EM | v1.2.2
|
|
170
|
+
|
|
171
|
+
Configuration
|
|
172
|
+
-------------
|
|
173
|
+
Pixel size: 0.56 A
|
|
174
|
+
Threshold: auto
|
|
175
|
+
Backend: Auto → GPU (Quadro RTX 8000)
|
|
176
|
+
|
|
177
|
+
Batch Processing
|
|
178
|
+
----------------
|
|
179
|
+
Files: 100
|
|
180
|
+
Output: /projects/user/output
|
|
181
|
+
Workers: 1
|
|
182
|
+
|
|
183
|
+
✓ Using 2 GPUs: GPU 0, GPU 1
|
|
184
|
+
|
|
185
|
+
✓ GPU 0: Quadro RTX 8000
|
|
186
|
+
✓ GPU 1: Quadro RTX 8000
|
|
187
|
+
|
|
188
|
+
Processing: 100%|█████████████████████████| 100/100 [05:12<00:00, 3.13s/file]
|
|
189
|
+
|
|
190
|
+
[OK] Batch complete (312.9s)
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
**100 images processed in ~5 minutes** with automatic multi-GPU distribution.
|
|
194
|
+
|
|
195
|
+
For compute-focused workloads, use Alpine's [GPU partitions](https://curc.readthedocs.io/en/latest/clusters/alpine/alpine-hardware.html) (A100, L40, MI100):
|
|
196
|
+
|
|
197
|
+
```bash
|
|
198
|
+
# Request 3 GPUs for 1 hour
|
|
199
|
+
sinteractive --partition=aa100 --gres=gpu:3 --ntasks=16 --nodes=1 --time=01:00:00 --qos=normal
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
134
204
|
## Python API
|
|
135
205
|
|
|
136
206
|
```python
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "lattice-sub"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.2.2"
|
|
8
8
|
description = "Lattice subtraction for cryo-EM micrographs - removes periodic crystal signals to reveal non-periodic features"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lattice-sub
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.2
|
|
4
4
|
Summary: Lattice subtraction for cryo-EM micrographs - removes periodic crystal signals to reveal non-periodic features
|
|
5
5
|
Author-email: George Stephenson <george.stephenson@colorado.edu>, Vignesh Kasinath <vignesh.kasinath@colorado.edu>
|
|
6
6
|
License: MIT
|
|
@@ -172,6 +172,76 @@ python -c "import torch; print(torch.cuda.get_device_name(0) if torch.cuda.is_av
|
|
|
172
172
|
|
|
173
173
|
---
|
|
174
174
|
|
|
175
|
+
## Multi-GPU Support
|
|
176
|
+
|
|
177
|
+
When processing batches on systems with multiple GPUs, files are automatically distributed across all available GPUs for faster processing. No extra flags needed!
|
|
178
|
+
|
|
179
|
+
```bash
|
|
180
|
+
# Automatically uses all available GPUs
|
|
181
|
+
lattice-sub batch input_folder/ output_folder/ -p 0.56
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
**Example with 2 GPUs and 100 images:**
|
|
185
|
+
- GPU 0: processes images 1-50
|
|
186
|
+
- GPU 1: processes images 51-100
|
|
187
|
+
- Single progress bar shows combined progress
|
|
188
|
+
|
|
189
|
+
This provides near-linear speedup with additional GPUs.
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## HPC Example (CU Boulder Alpine)
|
|
194
|
+
|
|
195
|
+
Using [Open OnDemand Core Desktop](https://curc.readthedocs.io/en/latest/open_ondemand/core_desktop.html) with 2× RTX 8000 GPUs:
|
|
196
|
+
|
|
197
|
+
```bash
|
|
198
|
+
# Create environment
|
|
199
|
+
module load anaconda
|
|
200
|
+
conda create -n lattice_test python=3.11 -y
|
|
201
|
+
conda activate lattice_test
|
|
202
|
+
pip install lattice-sub
|
|
203
|
+
|
|
204
|
+
# Process 100 micrographs
|
|
205
|
+
lattice-sub batch input/ output/ -p 0.56
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
**Output:**
|
|
209
|
+
```
|
|
210
|
+
Phase-preserving FFT inpainting for cryo-EM | v1.2.2
|
|
211
|
+
|
|
212
|
+
Configuration
|
|
213
|
+
-------------
|
|
214
|
+
Pixel size: 0.56 A
|
|
215
|
+
Threshold: auto
|
|
216
|
+
Backend: Auto → GPU (Quadro RTX 8000)
|
|
217
|
+
|
|
218
|
+
Batch Processing
|
|
219
|
+
----------------
|
|
220
|
+
Files: 100
|
|
221
|
+
Output: /projects/user/output
|
|
222
|
+
Workers: 1
|
|
223
|
+
|
|
224
|
+
✓ Using 2 GPUs: GPU 0, GPU 1
|
|
225
|
+
|
|
226
|
+
✓ GPU 0: Quadro RTX 8000
|
|
227
|
+
✓ GPU 1: Quadro RTX 8000
|
|
228
|
+
|
|
229
|
+
Processing: 100%|█████████████████████████| 100/100 [05:12<00:00, 3.13s/file]
|
|
230
|
+
|
|
231
|
+
[OK] Batch complete (312.9s)
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
**100 images processed in ~5 minutes** with automatic multi-GPU distribution.
|
|
235
|
+
|
|
236
|
+
For compute-focused workloads, use Alpine's [GPU partitions](https://curc.readthedocs.io/en/latest/clusters/alpine/alpine-hardware.html) (A100, L40, MI100):
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
# Request 3 GPUs for 1 hour
|
|
240
|
+
sinteractive --partition=aa100 --gres=gpu:3 --ntasks=16 --nodes=1 --time=01:00:00 --qos=normal
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
---
|
|
244
|
+
|
|
175
245
|
## Python API
|
|
176
246
|
|
|
177
247
|
```python
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Batch processing for multiple micrographs.
|
|
3
3
|
|
|
4
|
-
This module provides parallel processing capabilities for large datasets
|
|
4
|
+
This module provides parallel processing capabilities for large datasets,
|
|
5
|
+
including automatic multi-GPU support for systems with multiple CUDA devices.
|
|
5
6
|
"""
|
|
6
7
|
|
|
7
8
|
import os
|
|
9
|
+
import multiprocessing as mp
|
|
8
10
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
9
|
-
from dataclasses import dataclass
|
|
11
|
+
from dataclasses import dataclass, asdict
|
|
10
12
|
from pathlib import Path
|
|
11
13
|
from typing import List, Tuple, Optional, Callable
|
|
12
14
|
import logging
|
|
@@ -63,6 +65,91 @@ def _process_single_file(args: tuple) -> Tuple[Path, Optional[str]]:
|
|
|
63
65
|
return (Path(input_path), str(e))
|
|
64
66
|
|
|
65
67
|
|
|
68
|
+
def _gpu_worker(
|
|
69
|
+
gpu_id: int,
|
|
70
|
+
file_pairs: List[Tuple[str, str]],
|
|
71
|
+
config_dict: dict,
|
|
72
|
+
progress_queue: mp.Queue,
|
|
73
|
+
error_queue: mp.Queue,
|
|
74
|
+
):
|
|
75
|
+
"""
|
|
76
|
+
Worker function for multi-GPU processing.
|
|
77
|
+
|
|
78
|
+
Each worker processes its assigned files on a specific GPU and reports
|
|
79
|
+
progress through a shared queue.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
gpu_id: CUDA device ID to use
|
|
83
|
+
file_pairs: List of (input_path, output_path) tuples
|
|
84
|
+
config_dict: Configuration dictionary
|
|
85
|
+
progress_queue: Queue to report progress (sends 1 for each completed file)
|
|
86
|
+
error_queue: Queue to report errors (sends (gpu_id, file_path, error_msg))
|
|
87
|
+
"""
|
|
88
|
+
import torch
|
|
89
|
+
|
|
90
|
+
# Set this process to use the specific GPU
|
|
91
|
+
torch.cuda.set_device(gpu_id)
|
|
92
|
+
|
|
93
|
+
# Reconstruct config with the specific device_id and quiet mode
|
|
94
|
+
config_dict = config_dict.copy()
|
|
95
|
+
config_dict['device_id'] = gpu_id
|
|
96
|
+
config_dict['_quiet'] = True # Suppress messages - main process handles this
|
|
97
|
+
config = Config(**config_dict)
|
|
98
|
+
|
|
99
|
+
# Create subtractor (messages suppressed via _quiet flag)
|
|
100
|
+
subtractor = LatticeSubtractor(config)
|
|
101
|
+
|
|
102
|
+
for input_path, output_path in file_pairs:
|
|
103
|
+
try:
|
|
104
|
+
result = subtractor.process(input_path)
|
|
105
|
+
result.save(output_path, pixel_size=config.pixel_ang)
|
|
106
|
+
progress_queue.put(1)
|
|
107
|
+
except Exception as e:
|
|
108
|
+
error_queue.put((gpu_id, input_path, str(e)))
|
|
109
|
+
return # Fail-fast: exit on first error
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _check_gpu_memory(device_id: int, image_shape: Tuple[int, int]) -> Tuple[bool, str]:
|
|
113
|
+
"""
|
|
114
|
+
Check if GPU has sufficient memory for processing.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
device_id: CUDA device ID
|
|
118
|
+
image_shape: (height, width) of image
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
(is_ok, message) - True if sufficient memory, False with warning message
|
|
122
|
+
"""
|
|
123
|
+
try:
|
|
124
|
+
import torch
|
|
125
|
+
free_mem, total_mem = torch.cuda.mem_get_info(device_id)
|
|
126
|
+
|
|
127
|
+
# Estimate memory needed: image + FFT (complex) + masks + overhead
|
|
128
|
+
# Roughly 16x image size for safe margin (complex FFT, intermediate buffers)
|
|
129
|
+
image_bytes = image_shape[0] * image_shape[1] * 4 # float32
|
|
130
|
+
estimated_need = image_bytes * 16
|
|
131
|
+
|
|
132
|
+
if free_mem < estimated_need:
|
|
133
|
+
return False, (
|
|
134
|
+
f"GPU {device_id}: {free_mem / 1e9:.1f}GB free, "
|
|
135
|
+
f"need ~{estimated_need / 1e9:.1f}GB"
|
|
136
|
+
)
|
|
137
|
+
return True, ""
|
|
138
|
+
except Exception as e:
|
|
139
|
+
return True, "" # If we can't check, proceed anyway
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _get_available_gpus() -> List[int]:
|
|
143
|
+
"""Get list of available CUDA GPU device IDs."""
|
|
144
|
+
try:
|
|
145
|
+
import torch
|
|
146
|
+
if torch.cuda.is_available():
|
|
147
|
+
return list(range(torch.cuda.device_count()))
|
|
148
|
+
return []
|
|
149
|
+
except ImportError:
|
|
150
|
+
return []
|
|
151
|
+
|
|
152
|
+
|
|
66
153
|
class BatchProcessor:
|
|
67
154
|
"""
|
|
68
155
|
Parallel batch processor for micrograph datasets.
|
|
@@ -157,6 +244,9 @@ class BatchProcessor:
|
|
|
157
244
|
"""
|
|
158
245
|
Process a list of input/output file pairs.
|
|
159
246
|
|
|
247
|
+
Automatically uses multi-GPU processing when multiple GPUs are available.
|
|
248
|
+
Files are distributed evenly across GPUs in chunks.
|
|
249
|
+
|
|
160
250
|
Args:
|
|
161
251
|
file_pairs: List of (input_path, output_path) tuples
|
|
162
252
|
show_progress: If True, show progress bar
|
|
@@ -168,8 +258,7 @@ class BatchProcessor:
|
|
|
168
258
|
successful = 0
|
|
169
259
|
failed_files = []
|
|
170
260
|
|
|
171
|
-
# Check if using GPU - if so,
|
|
172
|
-
# With "auto" backend, check if PyTorch + CUDA is actually available
|
|
261
|
+
# Check if using GPU - if so, check for multi-GPU capability
|
|
173
262
|
use_gpu = self.config.backend == "pytorch"
|
|
174
263
|
if self.config.backend == "auto":
|
|
175
264
|
try:
|
|
@@ -179,10 +268,19 @@ class BatchProcessor:
|
|
|
179
268
|
use_gpu = False
|
|
180
269
|
|
|
181
270
|
if use_gpu:
|
|
182
|
-
#
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
)
|
|
271
|
+
# Check how many GPUs are available
|
|
272
|
+
available_gpus = _get_available_gpus()
|
|
273
|
+
|
|
274
|
+
if len(available_gpus) > 1 and total > 1:
|
|
275
|
+
# Multi-GPU processing
|
|
276
|
+
successful, failed_files = self._process_multi_gpu(
|
|
277
|
+
file_pairs, available_gpus, show_progress
|
|
278
|
+
)
|
|
279
|
+
else:
|
|
280
|
+
# Single GPU - sequential processing
|
|
281
|
+
successful, failed_files = self._process_sequential(
|
|
282
|
+
file_pairs, show_progress
|
|
283
|
+
)
|
|
186
284
|
else:
|
|
187
285
|
# Parallel processing for CPU
|
|
188
286
|
successful, failed_files = self._process_parallel(
|
|
@@ -284,6 +382,159 @@ class BatchProcessor:
|
|
|
284
382
|
|
|
285
383
|
return successful, failed_files
|
|
286
384
|
|
|
385
|
+
def _process_multi_gpu(
|
|
386
|
+
self,
|
|
387
|
+
file_pairs: List[Tuple[Path, Path]],
|
|
388
|
+
gpu_ids: List[int],
|
|
389
|
+
show_progress: bool = True,
|
|
390
|
+
) -> Tuple[int, List[Tuple[Path, str]]]:
|
|
391
|
+
"""
|
|
392
|
+
Process files in parallel across multiple GPUs.
|
|
393
|
+
|
|
394
|
+
Files are distributed evenly across GPUs in chunks.
|
|
395
|
+
Uses spawn-based multiprocessing to avoid CUDA fork issues.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
file_pairs: List of (input_path, output_path) tuples
|
|
399
|
+
gpu_ids: List of CUDA device IDs to use
|
|
400
|
+
show_progress: If True, show unified progress bar
|
|
401
|
+
|
|
402
|
+
Returns:
|
|
403
|
+
(successful_count, failed_files_list)
|
|
404
|
+
"""
|
|
405
|
+
import time
|
|
406
|
+
|
|
407
|
+
total = len(file_pairs)
|
|
408
|
+
num_gpus = len(gpu_ids)
|
|
409
|
+
|
|
410
|
+
# Print multi-GPU info with GPU names
|
|
411
|
+
try:
|
|
412
|
+
import torch
|
|
413
|
+
gpu_names = [torch.cuda.get_device_name(i) for i in gpu_ids]
|
|
414
|
+
print(f"✓ Using {num_gpus} GPUs: {', '.join(f'GPU {i}' for i in gpu_ids)}")
|
|
415
|
+
print("")
|
|
416
|
+
for i, name in zip(gpu_ids, gpu_names):
|
|
417
|
+
print(f" ✓ GPU {i}: {name}")
|
|
418
|
+
except Exception:
|
|
419
|
+
print(f"✓ Using {num_gpus} GPUs")
|
|
420
|
+
|
|
421
|
+
# Check GPU memory on first GPU (assume similar for all)
|
|
422
|
+
if file_pairs:
|
|
423
|
+
try:
|
|
424
|
+
sample_image = read_mrc(file_pairs[0][0])
|
|
425
|
+
is_ok, msg = _check_gpu_memory(gpu_ids[0], sample_image.shape)
|
|
426
|
+
if not is_ok:
|
|
427
|
+
print(f"⚠ Memory warning: {msg}")
|
|
428
|
+
except Exception:
|
|
429
|
+
pass # Proceed anyway
|
|
430
|
+
|
|
431
|
+
# Distribute files evenly across GPUs (chunked distribution)
|
|
432
|
+
chunk_size = (total + num_gpus - 1) // num_gpus # Ceiling division
|
|
433
|
+
gpu_file_assignments = []
|
|
434
|
+
|
|
435
|
+
for i, gpu_id in enumerate(gpu_ids):
|
|
436
|
+
start_idx = i * chunk_size
|
|
437
|
+
end_idx = min(start_idx + chunk_size, total)
|
|
438
|
+
if start_idx < total:
|
|
439
|
+
chunk = [(str(inp), str(out)) for inp, out in file_pairs[start_idx:end_idx]]
|
|
440
|
+
gpu_file_assignments.append((gpu_id, chunk))
|
|
441
|
+
|
|
442
|
+
# Create shared queues for progress and errors
|
|
443
|
+
# Use 'spawn' context to avoid CUDA fork issues
|
|
444
|
+
ctx = mp.get_context('spawn')
|
|
445
|
+
progress_queue = ctx.Queue()
|
|
446
|
+
error_queue = ctx.Queue()
|
|
447
|
+
|
|
448
|
+
# Create progress bar (after all GPU info printed)
|
|
449
|
+
if show_progress:
|
|
450
|
+
print() # Blank line for visual separation
|
|
451
|
+
pbar = tqdm(
|
|
452
|
+
total=total,
|
|
453
|
+
desc=" Processing",
|
|
454
|
+
unit="file",
|
|
455
|
+
ncols=80,
|
|
456
|
+
leave=True,
|
|
457
|
+
)
|
|
458
|
+
else:
|
|
459
|
+
pbar = None
|
|
460
|
+
|
|
461
|
+
# Start worker processes
|
|
462
|
+
processes = []
|
|
463
|
+
for gpu_id, file_chunk in gpu_file_assignments:
|
|
464
|
+
p = ctx.Process(
|
|
465
|
+
target=_gpu_worker,
|
|
466
|
+
args=(gpu_id, file_chunk, self._config_dict, progress_queue, error_queue),
|
|
467
|
+
)
|
|
468
|
+
p.start()
|
|
469
|
+
processes.append(p)
|
|
470
|
+
|
|
471
|
+
# Monitor progress and check for errors
|
|
472
|
+
successful = 0
|
|
473
|
+
failed_files = []
|
|
474
|
+
completed = 0
|
|
475
|
+
|
|
476
|
+
while completed < total:
|
|
477
|
+
# Check for progress updates (non-blocking with timeout)
|
|
478
|
+
try:
|
|
479
|
+
while True:
|
|
480
|
+
progress_queue.get(timeout=0.1)
|
|
481
|
+
successful += 1
|
|
482
|
+
completed += 1
|
|
483
|
+
if pbar:
|
|
484
|
+
pbar.update(1)
|
|
485
|
+
except:
|
|
486
|
+
pass # Queue empty, continue
|
|
487
|
+
|
|
488
|
+
# Check for errors (non-blocking)
|
|
489
|
+
try:
|
|
490
|
+
while True:
|
|
491
|
+
gpu_id, file_path, error_msg = error_queue.get_nowait()
|
|
492
|
+
failed_files.append((Path(file_path), error_msg))
|
|
493
|
+
completed += 1
|
|
494
|
+
if pbar:
|
|
495
|
+
pbar.update(1)
|
|
496
|
+
|
|
497
|
+
# Fail-fast: terminate all workers and report
|
|
498
|
+
print(f"\n✗ GPU {gpu_id} failed on {Path(file_path).name}: {error_msg}")
|
|
499
|
+
print(f"\nTip: Try a different configuration:")
|
|
500
|
+
print(f" lattice-sub batch <input> <output> -p {self.config.pixel_ang} --cpu -j 8")
|
|
501
|
+
|
|
502
|
+
# Terminate all processes
|
|
503
|
+
for p in processes:
|
|
504
|
+
if p.is_alive():
|
|
505
|
+
p.terminate()
|
|
506
|
+
|
|
507
|
+
if pbar:
|
|
508
|
+
pbar.close()
|
|
509
|
+
|
|
510
|
+
return successful, failed_files
|
|
511
|
+
except:
|
|
512
|
+
pass # No errors, continue
|
|
513
|
+
|
|
514
|
+
# Check if all processes have finished
|
|
515
|
+
all_done = all(not p.is_alive() for p in processes)
|
|
516
|
+
if all_done:
|
|
517
|
+
# Drain remaining queue items
|
|
518
|
+
try:
|
|
519
|
+
while True:
|
|
520
|
+
progress_queue.get_nowait()
|
|
521
|
+
successful += 1
|
|
522
|
+
completed += 1
|
|
523
|
+
if pbar:
|
|
524
|
+
pbar.update(1)
|
|
525
|
+
except:
|
|
526
|
+
pass
|
|
527
|
+
break
|
|
528
|
+
|
|
529
|
+
# Wait for all processes to finish
|
|
530
|
+
for p in processes:
|
|
531
|
+
p.join(timeout=1.0)
|
|
532
|
+
|
|
533
|
+
if pbar:
|
|
534
|
+
pbar.close()
|
|
535
|
+
|
|
536
|
+
return successful, failed_files
|
|
537
|
+
|
|
287
538
|
def process_numbered_sequence(
|
|
288
539
|
self,
|
|
289
540
|
input_pattern: str,
|
|
@@ -122,8 +122,11 @@ def setup_logging(verbose: bool, interactive: bool = False) -> None:
|
|
|
122
122
|
)
|
|
123
123
|
|
|
124
124
|
|
|
125
|
+
from . import __version__
|
|
126
|
+
|
|
127
|
+
|
|
125
128
|
@click.group()
|
|
126
|
-
@click.version_option(version=
|
|
129
|
+
@click.version_option(version=__version__, prog_name="lattice-sub")
|
|
127
130
|
def main():
|
|
128
131
|
"""
|
|
129
132
|
Lattice Subtraction for Cryo-EM Micrographs.
|
|
@@ -64,6 +64,13 @@ class Config:
|
|
|
64
64
|
# Enabled by default when GPU is available
|
|
65
65
|
use_kornia: bool = True
|
|
66
66
|
|
|
67
|
+
# GPU device ID for multi-GPU support. None = auto-select (GPU 0 for single-GPU mode)
|
|
68
|
+
# When using multi-GPU batch processing, this is set automatically per worker
|
|
69
|
+
device_id: Optional[int] = None
|
|
70
|
+
|
|
71
|
+
# Internal flag to suppress status messages (used by batch workers)
|
|
72
|
+
_quiet: bool = False
|
|
73
|
+
|
|
67
74
|
def __post_init__(self):
|
|
68
75
|
"""Validate and set auto-calculated parameters."""
|
|
69
76
|
if self.pixel_ang <= 0:
|
|
@@ -83,21 +83,31 @@ class LatticeSubtractor:
|
|
|
83
83
|
|
|
84
84
|
Auto mode tries PyTorch+CUDA first, then PyTorch CPU, then NumPy.
|
|
85
85
|
Prints user-friendly status message about which backend is active.
|
|
86
|
+
|
|
87
|
+
Uses config.device_id if specified for multi-GPU support.
|
|
86
88
|
"""
|
|
87
89
|
backend = self.config.backend
|
|
88
90
|
self._gpu_message_shown = getattr(self, '_gpu_message_shown', False)
|
|
89
91
|
|
|
92
|
+
# Check if quiet mode (suppress messages for batch workers)
|
|
93
|
+
quiet = getattr(self.config, '_quiet', False)
|
|
94
|
+
if quiet:
|
|
95
|
+
self._gpu_message_shown = True
|
|
96
|
+
|
|
97
|
+
# Get device ID from config (None means auto-select GPU 0)
|
|
98
|
+
device_id = self.config.device_id if self.config.device_id is not None else 0
|
|
99
|
+
|
|
90
100
|
# Auto mode: try GPU first, then CPU
|
|
91
101
|
if backend == "auto":
|
|
92
102
|
try:
|
|
93
103
|
import torch
|
|
94
104
|
if torch.cuda.is_available():
|
|
95
|
-
self.device = torch.device('cuda')
|
|
105
|
+
self.device = torch.device(f'cuda:{device_id}')
|
|
96
106
|
self.use_gpu = True
|
|
97
107
|
# Only print once per session (batch processing reuses subtractor)
|
|
98
108
|
if not self._gpu_message_shown:
|
|
99
|
-
gpu_name = torch.cuda.get_device_name(
|
|
100
|
-
print(f"✓ Using GPU: {gpu_name}")
|
|
109
|
+
gpu_name = torch.cuda.get_device_name(device_id)
|
|
110
|
+
print(f"✓ Using GPU {device_id}: {gpu_name}")
|
|
101
111
|
self._gpu_message_shown = True
|
|
102
112
|
else:
|
|
103
113
|
self.device = torch.device('cpu')
|
|
@@ -116,7 +126,7 @@ class LatticeSubtractor:
|
|
|
116
126
|
try:
|
|
117
127
|
import torch
|
|
118
128
|
if torch.cuda.is_available():
|
|
119
|
-
self.device = torch.device('cuda')
|
|
129
|
+
self.device = torch.device(f'cuda:{device_id}')
|
|
120
130
|
self.use_gpu = True
|
|
121
131
|
else:
|
|
122
132
|
import warnings
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|