lattice-sub 1.1.4__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/MANIFEST.in +1 -0
- {lattice_sub-1.1.4/src/lattice_sub.egg-info → lattice_sub-1.3.0}/PKG-INFO +84 -2
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/README.md +83 -1
- lattice_sub-1.3.0/docs/images/example_comparison.png +0 -0
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/examples/config.yaml +2 -2
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/pyproject.toml +1 -1
- {lattice_sub-1.1.4 → lattice_sub-1.3.0/src/lattice_sub.egg-info}/PKG-INFO +84 -2
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/__init__.py +1 -1
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/batch.py +259 -8
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/cli.py +15 -2
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/config.py +7 -0
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/core.py +14 -4
- lattice_sub-1.3.0/src/lattice_subtraction/visualization.py +368 -0
- lattice_sub-1.1.4/docs/images/example_comparison.png +0 -0
- lattice_sub-1.1.4/src/lattice_subtraction/visualization.py +0 -199
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/LICENSE +0 -0
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/docs/images/threshold_analysis.png +0 -0
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/examples/converted_params.yaml +0 -0
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/setup.cfg +0 -0
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_sub.egg-info/SOURCES.txt +0 -0
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_sub.egg-info/dependency_links.txt +0 -0
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_sub.egg-info/entry_points.txt +0 -0
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_sub.egg-info/requires.txt +0 -0
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_sub.egg-info/top_level.txt +0 -0
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/io.py +0 -0
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/masks.py +0 -0
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/processing.py +0 -0
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/threshold_optimizer.py +0 -0
- {lattice_sub-1.1.4 → lattice_sub-1.3.0}/src/lattice_subtraction/ui.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lattice-sub
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Lattice subtraction for cryo-EM micrographs - removes periodic crystal signals to reveal non-periodic features
|
|
5
5
|
Author-email: George Stephenson <george.stephenson@colorado.edu>, Vignesh Kasinath <vignesh.kasinath@colorado.edu>
|
|
6
6
|
License: MIT
|
|
@@ -94,7 +94,17 @@ lattice-sub batch input_folder/ output_folder/ --pixel-size 0.56
|
|
|
94
94
|
lattice-sub batch input_folder/ output_folder/ --pixel-size 0.56 --vis comparisons/
|
|
95
95
|
```
|
|
96
96
|
|
|
97
|
-
This creates
|
|
97
|
+
This creates 4-panel PNG comparison images for each micrograph showing:
|
|
98
|
+
1. **Original** - Input micrograph
|
|
99
|
+
2. **Subtracted** - Lattice-removed result
|
|
100
|
+
3. **Difference** - What was removed (5x amplified)
|
|
101
|
+
4. **Threshold Curve** - Threshold vs lattice removal efficacy
|
|
102
|
+
|
|
103
|
+
**Limit the number of visualizations:**
|
|
104
|
+
```bash
|
|
105
|
+
# Generate visualizations for first 10 images only
|
|
106
|
+
lattice-sub batch input_folder/ output_folder/ -p 0.56 --vis comparisons/ -n 10
|
|
107
|
+
```
|
|
98
108
|
|
|
99
109
|
---
|
|
100
110
|
|
|
@@ -105,6 +115,8 @@ This creates side-by-side PNG images showing before/after/difference for each mi
|
|
|
105
115
|
| `-p, --pixel-size` | **Required.** Pixel size in Ångstroms |
|
|
106
116
|
| `-o, --output` | Output file path (default: `sub_<input>`) |
|
|
107
117
|
| `-t, --threshold` | Peak detection sensitivity (default: **auto** - optimized per image) |
|
|
118
|
+
| `--vis DIR` | Generate 4-panel comparison PNGs in DIR |
|
|
119
|
+
| `-n, --num-vis N` | Limit visualizations to first N images |
|
|
108
120
|
| `--cpu` | Force CPU processing (GPU is used by default) |
|
|
109
121
|
| `-q, --quiet` | Hide the banner and progress messages |
|
|
110
122
|
| `-v, --verbose` | Show detailed processing information |
|
|
@@ -172,6 +184,76 @@ python -c "import torch; print(torch.cuda.get_device_name(0) if torch.cuda.is_av
|
|
|
172
184
|
|
|
173
185
|
---
|
|
174
186
|
|
|
187
|
+
## Multi-GPU Support
|
|
188
|
+
|
|
189
|
+
When processing batches on systems with multiple GPUs, files are automatically distributed across all available GPUs for faster processing. No extra flags needed!
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
# Automatically uses all available GPUs
|
|
193
|
+
lattice-sub batch input_folder/ output_folder/ -p 0.56
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
**Example with 2 GPUs and 100 images:**
|
|
197
|
+
- GPU 0: processes images 1-50
|
|
198
|
+
- GPU 1: processes images 51-100
|
|
199
|
+
- Single progress bar shows combined progress
|
|
200
|
+
|
|
201
|
+
This provides near-linear speedup with additional GPUs.
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## HPC Example (CU Boulder Alpine)
|
|
206
|
+
|
|
207
|
+
Using [Open OnDemand Core Desktop](https://curc.readthedocs.io/en/latest/open_ondemand/core_desktop.html) with 2× RTX 8000 GPUs:
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
# Create environment
|
|
211
|
+
module load anaconda
|
|
212
|
+
conda create -n lattice_test python=3.11 -y
|
|
213
|
+
conda activate lattice_test
|
|
214
|
+
pip install lattice-sub
|
|
215
|
+
|
|
216
|
+
# Process 100 micrographs
|
|
217
|
+
lattice-sub batch input/ output/ -p 0.56
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
**Output:**
|
|
221
|
+
```
|
|
222
|
+
Phase-preserving FFT inpainting for cryo-EM | v1.3.0
|
|
223
|
+
|
|
224
|
+
Configuration
|
|
225
|
+
-------------
|
|
226
|
+
Pixel size: 0.56 A
|
|
227
|
+
Threshold: auto
|
|
228
|
+
Backend: Auto → GPU (Quadro RTX 8000)
|
|
229
|
+
|
|
230
|
+
Batch Processing
|
|
231
|
+
----------------
|
|
232
|
+
Files: 100
|
|
233
|
+
Output: /projects/user/output
|
|
234
|
+
Workers: 1
|
|
235
|
+
|
|
236
|
+
✓ Using 2 GPUs: GPU 0, GPU 1
|
|
237
|
+
|
|
238
|
+
✓ GPU 0: Quadro RTX 8000
|
|
239
|
+
✓ GPU 1: Quadro RTX 8000
|
|
240
|
+
|
|
241
|
+
Processing: 100%|█████████████████████████| 100/100 [05:12<00:00, 3.13s/file]
|
|
242
|
+
|
|
243
|
+
[OK] Batch complete (312.9s)
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
**100 images processed in ~5 minutes** with automatic multi-GPU distribution.
|
|
247
|
+
|
|
248
|
+
For compute-focused workloads, use Alpine's [GPU partitions](https://curc.readthedocs.io/en/latest/clusters/alpine/alpine-hardware.html) (A100, L40, MI100):
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
# Request 3 GPUs for 1 hour
|
|
252
|
+
sinteractive --partition=aa100 --gres=gpu:3 --ntasks=16 --nodes=1 --time=01:00:00 --qos=normal
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
175
257
|
## Python API
|
|
176
258
|
|
|
177
259
|
```python
|
|
@@ -53,7 +53,17 @@ lattice-sub batch input_folder/ output_folder/ --pixel-size 0.56
|
|
|
53
53
|
lattice-sub batch input_folder/ output_folder/ --pixel-size 0.56 --vis comparisons/
|
|
54
54
|
```
|
|
55
55
|
|
|
56
|
-
This creates
|
|
56
|
+
This creates 4-panel PNG comparison images for each micrograph showing:
|
|
57
|
+
1. **Original** - Input micrograph
|
|
58
|
+
2. **Subtracted** - Lattice-removed result
|
|
59
|
+
3. **Difference** - What was removed (5x amplified)
|
|
60
|
+
4. **Threshold Curve** - Threshold vs lattice removal efficacy
|
|
61
|
+
|
|
62
|
+
**Limit the number of visualizations:**
|
|
63
|
+
```bash
|
|
64
|
+
# Generate visualizations for first 10 images only
|
|
65
|
+
lattice-sub batch input_folder/ output_folder/ -p 0.56 --vis comparisons/ -n 10
|
|
66
|
+
```
|
|
57
67
|
|
|
58
68
|
---
|
|
59
69
|
|
|
@@ -64,6 +74,8 @@ This creates side-by-side PNG images showing before/after/difference for each mi
|
|
|
64
74
|
| `-p, --pixel-size` | **Required.** Pixel size in Ångstroms |
|
|
65
75
|
| `-o, --output` | Output file path (default: `sub_<input>`) |
|
|
66
76
|
| `-t, --threshold` | Peak detection sensitivity (default: **auto** - optimized per image) |
|
|
77
|
+
| `--vis DIR` | Generate 4-panel comparison PNGs in DIR |
|
|
78
|
+
| `-n, --num-vis N` | Limit visualizations to first N images |
|
|
67
79
|
| `--cpu` | Force CPU processing (GPU is used by default) |
|
|
68
80
|
| `-q, --quiet` | Hide the banner and progress messages |
|
|
69
81
|
| `-v, --verbose` | Show detailed processing information |
|
|
@@ -131,6 +143,76 @@ python -c "import torch; print(torch.cuda.get_device_name(0) if torch.cuda.is_av
|
|
|
131
143
|
|
|
132
144
|
---
|
|
133
145
|
|
|
146
|
+
## Multi-GPU Support
|
|
147
|
+
|
|
148
|
+
When processing batches on systems with multiple GPUs, files are automatically distributed across all available GPUs for faster processing. No extra flags needed!
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
# Automatically uses all available GPUs
|
|
152
|
+
lattice-sub batch input_folder/ output_folder/ -p 0.56
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
**Example with 2 GPUs and 100 images:**
|
|
156
|
+
- GPU 0: processes images 1-50
|
|
157
|
+
- GPU 1: processes images 51-100
|
|
158
|
+
- Single progress bar shows combined progress
|
|
159
|
+
|
|
160
|
+
This provides near-linear speedup with additional GPUs.
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## HPC Example (CU Boulder Alpine)
|
|
165
|
+
|
|
166
|
+
Using [Open OnDemand Core Desktop](https://curc.readthedocs.io/en/latest/open_ondemand/core_desktop.html) with 2× RTX 8000 GPUs:
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
# Create environment
|
|
170
|
+
module load anaconda
|
|
171
|
+
conda create -n lattice_test python=3.11 -y
|
|
172
|
+
conda activate lattice_test
|
|
173
|
+
pip install lattice-sub
|
|
174
|
+
|
|
175
|
+
# Process 100 micrographs
|
|
176
|
+
lattice-sub batch input/ output/ -p 0.56
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
**Output:**
|
|
180
|
+
```
|
|
181
|
+
Phase-preserving FFT inpainting for cryo-EM | v1.3.0
|
|
182
|
+
|
|
183
|
+
Configuration
|
|
184
|
+
-------------
|
|
185
|
+
Pixel size: 0.56 A
|
|
186
|
+
Threshold: auto
|
|
187
|
+
Backend: Auto → GPU (Quadro RTX 8000)
|
|
188
|
+
|
|
189
|
+
Batch Processing
|
|
190
|
+
----------------
|
|
191
|
+
Files: 100
|
|
192
|
+
Output: /projects/user/output
|
|
193
|
+
Workers: 1
|
|
194
|
+
|
|
195
|
+
✓ Using 2 GPUs: GPU 0, GPU 1
|
|
196
|
+
|
|
197
|
+
✓ GPU 0: Quadro RTX 8000
|
|
198
|
+
✓ GPU 1: Quadro RTX 8000
|
|
199
|
+
|
|
200
|
+
Processing: 100%|█████████████████████████| 100/100 [05:12<00:00, 3.13s/file]
|
|
201
|
+
|
|
202
|
+
[OK] Batch complete (312.9s)
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
**100 images processed in ~5 minutes** with automatic multi-GPU distribution.
|
|
206
|
+
|
|
207
|
+
For compute-focused workloads, use Alpine's [GPU partitions](https://curc.readthedocs.io/en/latest/clusters/alpine/alpine-hardware.html) (A100, L40, MI100):
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
# Request 3 GPUs for 1 hour
|
|
211
|
+
sinteractive --partition=aa100 --gres=gpu:3 --ntasks=16 --nodes=1 --time=01:00:00 --qos=normal
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
134
216
|
## Python API
|
|
135
217
|
|
|
136
218
|
```python
|
|
Binary file
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
# Lattice Subtraction - Example Configuration
|
|
1
|
+
# Lattice Subtraction - Example Configuration
|
|
2
2
|
#
|
|
3
3
|
# This file contains all available configuration options.
|
|
4
4
|
# Copy and modify for your specific dataset.
|
|
5
5
|
#
|
|
6
|
-
#
|
|
6
|
+
# Auto-threshold and Kornia GPU acceleration are defaults.
|
|
7
7
|
# Just run `lattice-sub process image.mrc -p 0.56` for optimal results.
|
|
8
8
|
|
|
9
9
|
# ============================================
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "lattice-sub"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.3.0"
|
|
8
8
|
description = "Lattice subtraction for cryo-EM micrographs - removes periodic crystal signals to reveal non-periodic features"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lattice-sub
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Lattice subtraction for cryo-EM micrographs - removes periodic crystal signals to reveal non-periodic features
|
|
5
5
|
Author-email: George Stephenson <george.stephenson@colorado.edu>, Vignesh Kasinath <vignesh.kasinath@colorado.edu>
|
|
6
6
|
License: MIT
|
|
@@ -94,7 +94,17 @@ lattice-sub batch input_folder/ output_folder/ --pixel-size 0.56
|
|
|
94
94
|
lattice-sub batch input_folder/ output_folder/ --pixel-size 0.56 --vis comparisons/
|
|
95
95
|
```
|
|
96
96
|
|
|
97
|
-
This creates
|
|
97
|
+
This creates 4-panel PNG comparison images for each micrograph showing:
|
|
98
|
+
1. **Original** - Input micrograph
|
|
99
|
+
2. **Subtracted** - Lattice-removed result
|
|
100
|
+
3. **Difference** - What was removed (5x amplified)
|
|
101
|
+
4. **Threshold Curve** - Threshold vs lattice removal efficacy
|
|
102
|
+
|
|
103
|
+
**Limit the number of visualizations:**
|
|
104
|
+
```bash
|
|
105
|
+
# Generate visualizations for first 10 images only
|
|
106
|
+
lattice-sub batch input_folder/ output_folder/ -p 0.56 --vis comparisons/ -n 10
|
|
107
|
+
```
|
|
98
108
|
|
|
99
109
|
---
|
|
100
110
|
|
|
@@ -105,6 +115,8 @@ This creates side-by-side PNG images showing before/after/difference for each mi
|
|
|
105
115
|
| `-p, --pixel-size` | **Required.** Pixel size in Ångstroms |
|
|
106
116
|
| `-o, --output` | Output file path (default: `sub_<input>`) |
|
|
107
117
|
| `-t, --threshold` | Peak detection sensitivity (default: **auto** - optimized per image) |
|
|
118
|
+
| `--vis DIR` | Generate 4-panel comparison PNGs in DIR |
|
|
119
|
+
| `-n, --num-vis N` | Limit visualizations to first N images |
|
|
108
120
|
| `--cpu` | Force CPU processing (GPU is used by default) |
|
|
109
121
|
| `-q, --quiet` | Hide the banner and progress messages |
|
|
110
122
|
| `-v, --verbose` | Show detailed processing information |
|
|
@@ -172,6 +184,76 @@ python -c "import torch; print(torch.cuda.get_device_name(0) if torch.cuda.is_av
|
|
|
172
184
|
|
|
173
185
|
---
|
|
174
186
|
|
|
187
|
+
## Multi-GPU Support
|
|
188
|
+
|
|
189
|
+
When processing batches on systems with multiple GPUs, files are automatically distributed across all available GPUs for faster processing. No extra flags needed!
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
# Automatically uses all available GPUs
|
|
193
|
+
lattice-sub batch input_folder/ output_folder/ -p 0.56
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
**Example with 2 GPUs and 100 images:**
|
|
197
|
+
- GPU 0: processes images 1-50
|
|
198
|
+
- GPU 1: processes images 51-100
|
|
199
|
+
- Single progress bar shows combined progress
|
|
200
|
+
|
|
201
|
+
This provides near-linear speedup with additional GPUs.
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## HPC Example (CU Boulder Alpine)
|
|
206
|
+
|
|
207
|
+
Using [Open OnDemand Core Desktop](https://curc.readthedocs.io/en/latest/open_ondemand/core_desktop.html) with 2× RTX 8000 GPUs:
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
# Create environment
|
|
211
|
+
module load anaconda
|
|
212
|
+
conda create -n lattice_test python=3.11 -y
|
|
213
|
+
conda activate lattice_test
|
|
214
|
+
pip install lattice-sub
|
|
215
|
+
|
|
216
|
+
# Process 100 micrographs
|
|
217
|
+
lattice-sub batch input/ output/ -p 0.56
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
**Output:**
|
|
221
|
+
```
|
|
222
|
+
Phase-preserving FFT inpainting for cryo-EM | v1.3.0
|
|
223
|
+
|
|
224
|
+
Configuration
|
|
225
|
+
-------------
|
|
226
|
+
Pixel size: 0.56 A
|
|
227
|
+
Threshold: auto
|
|
228
|
+
Backend: Auto → GPU (Quadro RTX 8000)
|
|
229
|
+
|
|
230
|
+
Batch Processing
|
|
231
|
+
----------------
|
|
232
|
+
Files: 100
|
|
233
|
+
Output: /projects/user/output
|
|
234
|
+
Workers: 1
|
|
235
|
+
|
|
236
|
+
✓ Using 2 GPUs: GPU 0, GPU 1
|
|
237
|
+
|
|
238
|
+
✓ GPU 0: Quadro RTX 8000
|
|
239
|
+
✓ GPU 1: Quadro RTX 8000
|
|
240
|
+
|
|
241
|
+
Processing: 100%|█████████████████████████| 100/100 [05:12<00:00, 3.13s/file]
|
|
242
|
+
|
|
243
|
+
[OK] Batch complete (312.9s)
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
**100 images processed in ~5 minutes** with automatic multi-GPU distribution.
|
|
247
|
+
|
|
248
|
+
For compute-focused workloads, use Alpine's [GPU partitions](https://curc.readthedocs.io/en/latest/clusters/alpine/alpine-hardware.html) (A100, L40, MI100):
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
# Request 3 GPUs for 1 hour
|
|
252
|
+
sinteractive --partition=aa100 --gres=gpu:3 --ntasks=16 --nodes=1 --time=01:00:00 --qos=normal
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
175
257
|
## Python API
|
|
176
258
|
|
|
177
259
|
```python
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Batch processing for multiple micrographs.
|
|
3
3
|
|
|
4
|
-
This module provides parallel processing capabilities for large datasets
|
|
4
|
+
This module provides parallel processing capabilities for large datasets,
|
|
5
|
+
including automatic multi-GPU support for systems with multiple CUDA devices.
|
|
5
6
|
"""
|
|
6
7
|
|
|
7
8
|
import os
|
|
9
|
+
import multiprocessing as mp
|
|
8
10
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
9
|
-
from dataclasses import dataclass
|
|
11
|
+
from dataclasses import dataclass, asdict
|
|
10
12
|
from pathlib import Path
|
|
11
13
|
from typing import List, Tuple, Optional, Callable
|
|
12
14
|
import logging
|
|
@@ -63,6 +65,91 @@ def _process_single_file(args: tuple) -> Tuple[Path, Optional[str]]:
|
|
|
63
65
|
return (Path(input_path), str(e))
|
|
64
66
|
|
|
65
67
|
|
|
68
|
+
def _gpu_worker(
|
|
69
|
+
gpu_id: int,
|
|
70
|
+
file_pairs: List[Tuple[str, str]],
|
|
71
|
+
config_dict: dict,
|
|
72
|
+
progress_queue: mp.Queue,
|
|
73
|
+
error_queue: mp.Queue,
|
|
74
|
+
):
|
|
75
|
+
"""
|
|
76
|
+
Worker function for multi-GPU processing.
|
|
77
|
+
|
|
78
|
+
Each worker processes its assigned files on a specific GPU and reports
|
|
79
|
+
progress through a shared queue.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
gpu_id: CUDA device ID to use
|
|
83
|
+
file_pairs: List of (input_path, output_path) tuples
|
|
84
|
+
config_dict: Configuration dictionary
|
|
85
|
+
progress_queue: Queue to report progress (sends 1 for each completed file)
|
|
86
|
+
error_queue: Queue to report errors (sends (gpu_id, file_path, error_msg))
|
|
87
|
+
"""
|
|
88
|
+
import torch
|
|
89
|
+
|
|
90
|
+
# Set this process to use the specific GPU
|
|
91
|
+
torch.cuda.set_device(gpu_id)
|
|
92
|
+
|
|
93
|
+
# Reconstruct config with the specific device_id and quiet mode
|
|
94
|
+
config_dict = config_dict.copy()
|
|
95
|
+
config_dict['device_id'] = gpu_id
|
|
96
|
+
config_dict['_quiet'] = True # Suppress messages - main process handles this
|
|
97
|
+
config = Config(**config_dict)
|
|
98
|
+
|
|
99
|
+
# Create subtractor (messages suppressed via _quiet flag)
|
|
100
|
+
subtractor = LatticeSubtractor(config)
|
|
101
|
+
|
|
102
|
+
for input_path, output_path in file_pairs:
|
|
103
|
+
try:
|
|
104
|
+
result = subtractor.process(input_path)
|
|
105
|
+
result.save(output_path, pixel_size=config.pixel_ang)
|
|
106
|
+
progress_queue.put(1)
|
|
107
|
+
except Exception as e:
|
|
108
|
+
error_queue.put((gpu_id, input_path, str(e)))
|
|
109
|
+
return # Fail-fast: exit on first error
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _check_gpu_memory(device_id: int, image_shape: Tuple[int, int]) -> Tuple[bool, str]:
|
|
113
|
+
"""
|
|
114
|
+
Check if GPU has sufficient memory for processing.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
device_id: CUDA device ID
|
|
118
|
+
image_shape: (height, width) of image
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
(is_ok, message) - True if sufficient memory, False with warning message
|
|
122
|
+
"""
|
|
123
|
+
try:
|
|
124
|
+
import torch
|
|
125
|
+
free_mem, total_mem = torch.cuda.mem_get_info(device_id)
|
|
126
|
+
|
|
127
|
+
# Estimate memory needed: image + FFT (complex) + masks + overhead
|
|
128
|
+
# Roughly 16x image size for safe margin (complex FFT, intermediate buffers)
|
|
129
|
+
image_bytes = image_shape[0] * image_shape[1] * 4 # float32
|
|
130
|
+
estimated_need = image_bytes * 16
|
|
131
|
+
|
|
132
|
+
if free_mem < estimated_need:
|
|
133
|
+
return False, (
|
|
134
|
+
f"GPU {device_id}: {free_mem / 1e9:.1f}GB free, "
|
|
135
|
+
f"need ~{estimated_need / 1e9:.1f}GB"
|
|
136
|
+
)
|
|
137
|
+
return True, ""
|
|
138
|
+
except Exception as e:
|
|
139
|
+
return True, "" # If we can't check, proceed anyway
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _get_available_gpus() -> List[int]:
|
|
143
|
+
"""Get list of available CUDA GPU device IDs."""
|
|
144
|
+
try:
|
|
145
|
+
import torch
|
|
146
|
+
if torch.cuda.is_available():
|
|
147
|
+
return list(range(torch.cuda.device_count()))
|
|
148
|
+
return []
|
|
149
|
+
except ImportError:
|
|
150
|
+
return []
|
|
151
|
+
|
|
152
|
+
|
|
66
153
|
class BatchProcessor:
|
|
67
154
|
"""
|
|
68
155
|
Parallel batch processor for micrograph datasets.
|
|
@@ -157,6 +244,9 @@ class BatchProcessor:
|
|
|
157
244
|
"""
|
|
158
245
|
Process a list of input/output file pairs.
|
|
159
246
|
|
|
247
|
+
Automatically uses multi-GPU processing when multiple GPUs are available.
|
|
248
|
+
Files are distributed evenly across GPUs in chunks.
|
|
249
|
+
|
|
160
250
|
Args:
|
|
161
251
|
file_pairs: List of (input_path, output_path) tuples
|
|
162
252
|
show_progress: If True, show progress bar
|
|
@@ -168,8 +258,7 @@ class BatchProcessor:
|
|
|
168
258
|
successful = 0
|
|
169
259
|
failed_files = []
|
|
170
260
|
|
|
171
|
-
# Check if using GPU - if so,
|
|
172
|
-
# With "auto" backend, check if PyTorch + CUDA is actually available
|
|
261
|
+
# Check if using GPU - if so, check for multi-GPU capability
|
|
173
262
|
use_gpu = self.config.backend == "pytorch"
|
|
174
263
|
if self.config.backend == "auto":
|
|
175
264
|
try:
|
|
@@ -179,10 +268,19 @@ class BatchProcessor:
|
|
|
179
268
|
use_gpu = False
|
|
180
269
|
|
|
181
270
|
if use_gpu:
|
|
182
|
-
#
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
)
|
|
271
|
+
# Check how many GPUs are available
|
|
272
|
+
available_gpus = _get_available_gpus()
|
|
273
|
+
|
|
274
|
+
if len(available_gpus) > 1 and total > 1:
|
|
275
|
+
# Multi-GPU processing
|
|
276
|
+
successful, failed_files = self._process_multi_gpu(
|
|
277
|
+
file_pairs, available_gpus, show_progress
|
|
278
|
+
)
|
|
279
|
+
else:
|
|
280
|
+
# Single GPU - sequential processing
|
|
281
|
+
successful, failed_files = self._process_sequential(
|
|
282
|
+
file_pairs, show_progress
|
|
283
|
+
)
|
|
186
284
|
else:
|
|
187
285
|
# Parallel processing for CPU
|
|
188
286
|
successful, failed_files = self._process_parallel(
|
|
@@ -284,6 +382,159 @@ class BatchProcessor:
|
|
|
284
382
|
|
|
285
383
|
return successful, failed_files
|
|
286
384
|
|
|
385
|
+
def _process_multi_gpu(
|
|
386
|
+
self,
|
|
387
|
+
file_pairs: List[Tuple[Path, Path]],
|
|
388
|
+
gpu_ids: List[int],
|
|
389
|
+
show_progress: bool = True,
|
|
390
|
+
) -> Tuple[int, List[Tuple[Path, str]]]:
|
|
391
|
+
"""
|
|
392
|
+
Process files in parallel across multiple GPUs.
|
|
393
|
+
|
|
394
|
+
Files are distributed evenly across GPUs in chunks.
|
|
395
|
+
Uses spawn-based multiprocessing to avoid CUDA fork issues.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
file_pairs: List of (input_path, output_path) tuples
|
|
399
|
+
gpu_ids: List of CUDA device IDs to use
|
|
400
|
+
show_progress: If True, show unified progress bar
|
|
401
|
+
|
|
402
|
+
Returns:
|
|
403
|
+
(successful_count, failed_files_list)
|
|
404
|
+
"""
|
|
405
|
+
import time
|
|
406
|
+
|
|
407
|
+
total = len(file_pairs)
|
|
408
|
+
num_gpus = len(gpu_ids)
|
|
409
|
+
|
|
410
|
+
# Print multi-GPU info with GPU names
|
|
411
|
+
try:
|
|
412
|
+
import torch
|
|
413
|
+
gpu_names = [torch.cuda.get_device_name(i) for i in gpu_ids]
|
|
414
|
+
print(f"✓ Using {num_gpus} GPUs: {', '.join(f'GPU {i}' for i in gpu_ids)}")
|
|
415
|
+
print("")
|
|
416
|
+
for i, name in zip(gpu_ids, gpu_names):
|
|
417
|
+
print(f" ✓ GPU {i}: {name}")
|
|
418
|
+
except Exception:
|
|
419
|
+
print(f"✓ Using {num_gpus} GPUs")
|
|
420
|
+
|
|
421
|
+
# Check GPU memory on first GPU (assume similar for all)
|
|
422
|
+
if file_pairs:
|
|
423
|
+
try:
|
|
424
|
+
sample_image = read_mrc(file_pairs[0][0])
|
|
425
|
+
is_ok, msg = _check_gpu_memory(gpu_ids[0], sample_image.shape)
|
|
426
|
+
if not is_ok:
|
|
427
|
+
print(f"⚠ Memory warning: {msg}")
|
|
428
|
+
except Exception:
|
|
429
|
+
pass # Proceed anyway
|
|
430
|
+
|
|
431
|
+
# Distribute files evenly across GPUs (chunked distribution)
|
|
432
|
+
chunk_size = (total + num_gpus - 1) // num_gpus # Ceiling division
|
|
433
|
+
gpu_file_assignments = []
|
|
434
|
+
|
|
435
|
+
for i, gpu_id in enumerate(gpu_ids):
|
|
436
|
+
start_idx = i * chunk_size
|
|
437
|
+
end_idx = min(start_idx + chunk_size, total)
|
|
438
|
+
if start_idx < total:
|
|
439
|
+
chunk = [(str(inp), str(out)) for inp, out in file_pairs[start_idx:end_idx]]
|
|
440
|
+
gpu_file_assignments.append((gpu_id, chunk))
|
|
441
|
+
|
|
442
|
+
# Create shared queues for progress and errors
|
|
443
|
+
# Use 'spawn' context to avoid CUDA fork issues
|
|
444
|
+
ctx = mp.get_context('spawn')
|
|
445
|
+
progress_queue = ctx.Queue()
|
|
446
|
+
error_queue = ctx.Queue()
|
|
447
|
+
|
|
448
|
+
# Create progress bar (after all GPU info printed)
|
|
449
|
+
if show_progress:
|
|
450
|
+
print() # Blank line for visual separation
|
|
451
|
+
pbar = tqdm(
|
|
452
|
+
total=total,
|
|
453
|
+
desc=" Processing",
|
|
454
|
+
unit="file",
|
|
455
|
+
ncols=80,
|
|
456
|
+
leave=True,
|
|
457
|
+
)
|
|
458
|
+
else:
|
|
459
|
+
pbar = None
|
|
460
|
+
|
|
461
|
+
# Start worker processes
|
|
462
|
+
processes = []
|
|
463
|
+
for gpu_id, file_chunk in gpu_file_assignments:
|
|
464
|
+
p = ctx.Process(
|
|
465
|
+
target=_gpu_worker,
|
|
466
|
+
args=(gpu_id, file_chunk, self._config_dict, progress_queue, error_queue),
|
|
467
|
+
)
|
|
468
|
+
p.start()
|
|
469
|
+
processes.append(p)
|
|
470
|
+
|
|
471
|
+
# Monitor progress and check for errors
|
|
472
|
+
successful = 0
|
|
473
|
+
failed_files = []
|
|
474
|
+
completed = 0
|
|
475
|
+
|
|
476
|
+
while completed < total:
|
|
477
|
+
# Check for progress updates (non-blocking with timeout)
|
|
478
|
+
try:
|
|
479
|
+
while True:
|
|
480
|
+
progress_queue.get(timeout=0.1)
|
|
481
|
+
successful += 1
|
|
482
|
+
completed += 1
|
|
483
|
+
if pbar:
|
|
484
|
+
pbar.update(1)
|
|
485
|
+
except:
|
|
486
|
+
pass # Queue empty, continue
|
|
487
|
+
|
|
488
|
+
# Check for errors (non-blocking)
|
|
489
|
+
try:
|
|
490
|
+
while True:
|
|
491
|
+
gpu_id, file_path, error_msg = error_queue.get_nowait()
|
|
492
|
+
failed_files.append((Path(file_path), error_msg))
|
|
493
|
+
completed += 1
|
|
494
|
+
if pbar:
|
|
495
|
+
pbar.update(1)
|
|
496
|
+
|
|
497
|
+
# Fail-fast: terminate all workers and report
|
|
498
|
+
print(f"\n✗ GPU {gpu_id} failed on {Path(file_path).name}: {error_msg}")
|
|
499
|
+
print(f"\nTip: Try a different configuration:")
|
|
500
|
+
print(f" lattice-sub batch <input> <output> -p {self.config.pixel_ang} --cpu -j 8")
|
|
501
|
+
|
|
502
|
+
# Terminate all processes
|
|
503
|
+
for p in processes:
|
|
504
|
+
if p.is_alive():
|
|
505
|
+
p.terminate()
|
|
506
|
+
|
|
507
|
+
if pbar:
|
|
508
|
+
pbar.close()
|
|
509
|
+
|
|
510
|
+
return successful, failed_files
|
|
511
|
+
except:
|
|
512
|
+
pass # No errors, continue
|
|
513
|
+
|
|
514
|
+
# Check if all processes have finished
|
|
515
|
+
all_done = all(not p.is_alive() for p in processes)
|
|
516
|
+
if all_done:
|
|
517
|
+
# Drain remaining queue items
|
|
518
|
+
try:
|
|
519
|
+
while True:
|
|
520
|
+
progress_queue.get_nowait()
|
|
521
|
+
successful += 1
|
|
522
|
+
completed += 1
|
|
523
|
+
if pbar:
|
|
524
|
+
pbar.update(1)
|
|
525
|
+
except:
|
|
526
|
+
pass
|
|
527
|
+
break
|
|
528
|
+
|
|
529
|
+
# Wait for all processes to finish
|
|
530
|
+
for p in processes:
|
|
531
|
+
p.join(timeout=1.0)
|
|
532
|
+
|
|
533
|
+
if pbar:
|
|
534
|
+
pbar.close()
|
|
535
|
+
|
|
536
|
+
return successful, failed_files
|
|
537
|
+
|
|
287
538
|
def process_numbered_sequence(
|
|
288
539
|
self,
|
|
289
540
|
input_pattern: str,
|