octopi 1.0__tar.gz → 1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of octopi might be problematic. Click here for more details.
- octopi-1.1/PKG-INFO +108 -0
- octopi-1.1/README.md +69 -0
- {octopi-1.0 → octopi-1.1}/octopi/entry_points/common.py +5 -5
- {octopi-1.0 → octopi-1.1}/octopi/entry_points/create_slurm_submission.py +15 -7
- {octopi-1.0 → octopi-1.1}/octopi/entry_points/run_extract_mb_picks.py +17 -40
- {octopi-1.0 → octopi-1.1}/octopi/entry_points/run_localize.py +32 -48
- {octopi-1.0 → octopi-1.1}/octopi/extract/localize.py +17 -2
- {octopi-1.0 → octopi-1.1}/octopi/io.py +1 -1
- {octopi-1.0 → octopi-1.1}/octopi/main.py +1 -1
- {octopi-1.0 → octopi-1.1}/octopi/processing/create_targets_from_picks.py +8 -1
- {octopi-1.0 → octopi-1.1}/octopi/processing/downsample.py +6 -10
- {octopi-1.0 → octopi-1.1}/octopi/pytorch/model_search_submitter.py +11 -11
- {octopi-1.0 → octopi-1.1}/octopi/pytorch/segmentation.py +12 -8
- {octopi-1.0 → octopi-1.1}/octopi/pytorch/trainer.py +7 -1
- {octopi-1.0 → octopi-1.1}/pyproject.toml +6 -1
- octopi-1.0/PKG-INFO +0 -209
- octopi-1.0/README.md +0 -173
- {octopi-1.0 → octopi-1.1}/LICENSE +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/__init__.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/datasets/__init__.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/datasets/augment.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/datasets/cached_datset.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/datasets/dataset.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/datasets/generators.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/datasets/mixup.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/datasets/multi_config_generator.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/entry_points/__init__.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/entry_points/run_create_targets.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/entry_points/run_evaluate.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/entry_points/run_extract_midpoint.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/entry_points/run_optuna.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/entry_points/run_segment_predict.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/entry_points/run_train.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/extract/__init__.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/extract/membranebound_extract.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/extract/midpoint_extract.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/losses.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/models/AttentionUnet.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/models/MedNeXt.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/models/ModelTemplate.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/models/SegResNet.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/models/Unet.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/models/UnetPlusPlus.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/models/__init__.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/models/common.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/processing/__init__.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/processing/evaluate.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/processing/importers.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/processing/my_metrics.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/processing/segmentation_from_picks.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/processing/writers.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/pytorch/__init__.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/pytorch/hyper_search.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/pytorch_lightning/__init__.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/pytorch_lightning/optuna_pl_ddp.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/pytorch_lightning/train_pl.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/stopping_criteria.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/submit_slurm.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/utils.py +0 -0
- {octopi-1.0 → octopi-1.1}/octopi/visualization_tools.py +0 -0
octopi-1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: octopi
|
|
3
|
+
Version: 1.1
|
|
4
|
+
Summary: Model architecture exploration for cryoET particle picking
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: Jonathan Schwartz
|
|
7
|
+
Requires-Python: >=3.9,<4.0
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Requires-Dist: copick
|
|
16
|
+
Requires-Dist: ipywidgets
|
|
17
|
+
Requires-Dist: kaleido
|
|
18
|
+
Requires-Dist: matplotlib
|
|
19
|
+
Requires-Dist: mlflow (==2.17.0)
|
|
20
|
+
Requires-Dist: monai-weekly (==1.5.dev2448)
|
|
21
|
+
Requires-Dist: mrcfile
|
|
22
|
+
Requires-Dist: multiprocess
|
|
23
|
+
Requires-Dist: nibabel
|
|
24
|
+
Requires-Dist: optuna (==4.0.0)
|
|
25
|
+
Requires-Dist: optuna-integration[botorch,pytorch-lightning]
|
|
26
|
+
Requires-Dist: pandas
|
|
27
|
+
Requires-Dist: plotly
|
|
28
|
+
Requires-Dist: python-dotenv
|
|
29
|
+
Requires-Dist: pytorch-lightning (==2.4.0)
|
|
30
|
+
Requires-Dist: requests (>=2.25.1,<3.0.0)
|
|
31
|
+
Requires-Dist: seaborn
|
|
32
|
+
Requires-Dist: torch-ema
|
|
33
|
+
Requires-Dist: tqdm
|
|
34
|
+
Project-URL: Documentation, https://chanzuckerberg.github.io/octopi/
|
|
35
|
+
Project-URL: Homepage, https://github.com/chanzuckerberg/octopi
|
|
36
|
+
Project-URL: Issues, https://github.com/chanzuckerberg/octopi/issues
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
|
|
39
|
+
# OCTOPI 🐙🐙🐙
|
|
40
|
+
|
|
41
|
+
[](https://github.com/chanzuckerberg/octopi/raw/main/LICENSE)
|
|
42
|
+
[](https://pypi.org/project/octopi)
|
|
43
|
+
[](https://www.python.org/)
|
|
44
|
+
|
|
45
|
+
**O**bject dete**CT**ion **O**f **P**rote**I**ns. A deep learning framework for Cryo-ET 3D particle picking with autonomous model exploration capabilities.
|
|
46
|
+
|
|
47
|
+
## 🚀 Introduction
|
|
48
|
+
|
|
49
|
+
octopi addresses a critical bottleneck in cryo-electron tomography (cryo-ET) research: the efficient identification and extraction of proteins within complex cellular environments. As advances in cryo-ET enable the collection of thousands of tomograms, the need for automated, accurate particle picking has become increasingly urgent.
|
|
50
|
+
|
|
51
|
+
Our deep learning-based pipeline streamlines the training and execution of 3D autoencoder models specifically designed for cryo-ET particle picking. Built on [copick](https://github.com/copick/copick), a storage-agnostic API, octopi seamlessly accesses tomograms and segmentations across local and remote environments.
|
|
52
|
+
|
|
53
|
+
## 🧩 Core Features
|
|
54
|
+
|
|
55
|
+
- **3D U-Net Training**: Train and evaluate custom 3D U-Net models for particle segmentation
|
|
56
|
+
- **Automatic Architecture Search**: Explore optimal model configurations using Bayesian optimization via Optuna
|
|
57
|
+
- **Flexible Data Access**: Seamlessly work with tomograms from local storage or remote data portals
|
|
58
|
+
- **HPC Ready**: Built-in support for SLURM-based clusters
|
|
59
|
+
- **Experiment Tracking**: Integrated MLflow support for monitoring training and optimization
|
|
60
|
+
- **Dual Interface**: Use via command-line or Python API
|
|
61
|
+
|
|
62
|
+
## 🚀 Quick Start
|
|
63
|
+
|
|
64
|
+
### Installation
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
pip install octopi
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Basic Usage
|
|
71
|
+
|
|
72
|
+
octopi provides two main command-line interfaces:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
# Main CLI for training, inference, and data processing
|
|
76
|
+
octopi --help
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
The main `octopi` command provides subcommands for:
|
|
80
|
+
- Data import and preprocessing
|
|
81
|
+
- Training label preparation
|
|
82
|
+
- Model training and exploration
|
|
83
|
+
- Inference and particle localization
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
# HPC-specific CLI for submitting jobs to SLURM clusters
|
|
87
|
+
octopi-slurm --help
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
The `octopi-slurm` command provides utilities for:
|
|
91
|
+
- Submitting training jobs to SLURM clusters
|
|
92
|
+
- Managing distributed inference tasks
|
|
93
|
+
- Handling batch processing on HPC systems
|
|
94
|
+
|
|
95
|
+
## 📚 Documentation
|
|
96
|
+
|
|
97
|
+
For detailed documentation, tutorials, CLI and API reference, visit our [documentation](https://chanzuckerberg.github.io/octopi/).
|
|
98
|
+
|
|
99
|
+
## 🤝 Contributing
|
|
100
|
+
|
|
101
|
+
This project adheres to the Contributor Covenant code of conduct. By participating, you are expected to uphold this code. Please report unacceptable behavior to opensource@chanzuckerberg.com.
|
|
102
|
+
|
|
103
|
+
## 🔒 Security
|
|
104
|
+
|
|
105
|
+
If you believe you have found a security issue, please responsibly disclose by contacting us at security@chanzuckerberg.com.
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
|
octopi-1.1/README.md
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# OCTOPI 🐙🐙🐙
|
|
2
|
+
|
|
3
|
+
[](https://github.com/chanzuckerberg/octopi/raw/main/LICENSE)
|
|
4
|
+
[](https://pypi.org/project/octopi)
|
|
5
|
+
[](https://www.python.org/)
|
|
6
|
+
|
|
7
|
+
**O**bject dete**CT**ion **O**f **P**rote**I**ns. A deep learning framework for Cryo-ET 3D particle picking with autonomous model exploration capabilities.
|
|
8
|
+
|
|
9
|
+
## 🚀 Introduction
|
|
10
|
+
|
|
11
|
+
octopi addresses a critical bottleneck in cryo-electron tomography (cryo-ET) research: the efficient identification and extraction of proteins within complex cellular environments. As advances in cryo-ET enable the collection of thousands of tomograms, the need for automated, accurate particle picking has become increasingly urgent.
|
|
12
|
+
|
|
13
|
+
Our deep learning-based pipeline streamlines the training and execution of 3D autoencoder models specifically designed for cryo-ET particle picking. Built on [copick](https://github.com/copick/copick), a storage-agnostic API, octopi seamlessly accesses tomograms and segmentations across local and remote environments.
|
|
14
|
+
|
|
15
|
+
## 🧩 Core Features
|
|
16
|
+
|
|
17
|
+
- **3D U-Net Training**: Train and evaluate custom 3D U-Net models for particle segmentation
|
|
18
|
+
- **Automatic Architecture Search**: Explore optimal model configurations using Bayesian optimization via Optuna
|
|
19
|
+
- **Flexible Data Access**: Seamlessly work with tomograms from local storage or remote data portals
|
|
20
|
+
- **HPC Ready**: Built-in support for SLURM-based clusters
|
|
21
|
+
- **Experiment Tracking**: Integrated MLflow support for monitoring training and optimization
|
|
22
|
+
- **Dual Interface**: Use via command-line or Python API
|
|
23
|
+
|
|
24
|
+
## 🚀 Quick Start
|
|
25
|
+
|
|
26
|
+
### Installation
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pip install octopi
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Basic Usage
|
|
33
|
+
|
|
34
|
+
octopi provides two main command-line interfaces:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# Main CLI for training, inference, and data processing
|
|
38
|
+
octopi --help
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
The main `octopi` command provides subcommands for:
|
|
42
|
+
- Data import and preprocessing
|
|
43
|
+
- Training label preparation
|
|
44
|
+
- Model training and exploration
|
|
45
|
+
- Inference and particle localization
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
# HPC-specific CLI for submitting jobs to SLURM clusters
|
|
49
|
+
octopi-slurm --help
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
The `octopi-slurm` command provides utilities for:
|
|
53
|
+
- Submitting training jobs to SLURM clusters
|
|
54
|
+
- Managing distributed inference tasks
|
|
55
|
+
- Handling batch processing on HPC systems
|
|
56
|
+
|
|
57
|
+
## 📚 Documentation
|
|
58
|
+
|
|
59
|
+
For detailed documentation, tutorials, CLI and API reference, visit our [documentation](https://chanzuckerberg.github.io/octopi/).
|
|
60
|
+
|
|
61
|
+
## 🤝 Contributing
|
|
62
|
+
|
|
63
|
+
This project adheres to the Contributor Covenant code of conduct. By participating, you are expected to uphold this code. Please report unacceptable behavior to opensource@chanzuckerberg.com.
|
|
64
|
+
|
|
65
|
+
## 🔒 Security
|
|
66
|
+
|
|
67
|
+
If you believe you have found a security issue, please responsibly disclose by contacting us at security@chanzuckerberg.com.
|
|
68
|
+
|
|
69
|
+
|
|
@@ -8,9 +8,9 @@ def add_model_parameters(parser, octopi = False):
|
|
|
8
8
|
|
|
9
9
|
# Add U-Net model parameters
|
|
10
10
|
parser.add_argument("--Nclass", type=int, required=False, default=3, help="Number of prediction classes in the model")
|
|
11
|
-
parser.add_argument("--channels", type=utils.parse_int_list, required=False, default='32,64,
|
|
11
|
+
parser.add_argument("--channels", type=utils.parse_int_list, required=False, default='32,64,96,96', help="List of channel sizes")
|
|
12
12
|
parser.add_argument("--strides", type=utils.parse_int_list, required=False, default='2,2,1', help="List of stride sizes")
|
|
13
|
-
parser.add_argument("--res-units", type=int, required=False, default=
|
|
13
|
+
parser.add_argument("--res-units", type=int, required=False, default=1, help="Number of residual units in the UNet")
|
|
14
14
|
parser.add_argument("--dim-in", type=int, required=False, default=96, help="Input dimension for the UNet model")
|
|
15
15
|
|
|
16
16
|
def inference_model_parameters(parser):
|
|
@@ -24,7 +24,7 @@ def add_train_parameters(parser, octopi = False):
|
|
|
24
24
|
"""
|
|
25
25
|
Add training parameters to the parser.
|
|
26
26
|
"""
|
|
27
|
-
parser.add_argument("--num-epochs", type=int, required=False, default=
|
|
27
|
+
parser.add_argument("--num-epochs", type=int, required=False, default=1000, help="Number of training epochs")
|
|
28
28
|
parser.add_argument("--val-interval", type=int, required=False, default=10, help="Interval for validation metric calculations")
|
|
29
29
|
parser.add_argument("--tomo-batch-size", type=int, required=False, default=15, help="Number of tomograms to load per epoch for training")
|
|
30
30
|
parser.add_argument("--best-metric", type=str, default='avg_f1', required=False, help="Metric to Monitor for Determining Best Model. To track fBetaN, use fBetaN with N as the beta-value.")
|
|
@@ -32,8 +32,8 @@ def add_train_parameters(parser, octopi = False):
|
|
|
32
32
|
if not octopi:
|
|
33
33
|
parser.add_argument("--num-tomo-crops", type=int, required=False, default=16, help="Number of tomogram crops to use per patch")
|
|
34
34
|
parser.add_argument("--lr", type=float, required=False, default=1e-3, help="Learning rate for the optimizer")
|
|
35
|
-
parser.add_argument("--tversky-alpha", type=float, required=False, default=0.
|
|
36
|
-
parser.add_argument("--model-save-path", required=
|
|
35
|
+
parser.add_argument("--tversky-alpha", type=float, required=False, default=0.3, help="Alpha parameter for the Tversky loss")
|
|
36
|
+
parser.add_argument("--model-save-path", required=False, default='results', help="Path to model save directory")
|
|
37
37
|
else:
|
|
38
38
|
parser.add_argument("--num-trials", type=int, default=10, required=False, help="Number of trials for architecture search (default: 10).")
|
|
39
39
|
|
|
@@ -16,19 +16,27 @@ def create_train_script(args):
|
|
|
16
16
|
|
|
17
17
|
command = f"""
|
|
18
18
|
octopi train \\
|
|
19
|
+
{strconfigs} \\
|
|
19
20
|
--model-save-path {args.model_save_path} \\
|
|
20
|
-
--target-info {args.target_info} \\
|
|
21
|
-
--voxel-size {args.voxel_size} --tomo-
|
|
22
|
-
--best-metric {args.best_metric} --num-epochs {args.num_epochs} --val-interval {args.val_interval} \\
|
|
21
|
+
--target-info {','.join(args.target_info)} \\
|
|
22
|
+
--voxel-size {args.voxel_size} --tomo-alg {args.tomo_alg} --Nclass {args.Nclass} \\
|
|
23
23
|
--tomo-batch-size {args.tomo_batch_size} --num-tomo-crops {args.num_tomo_crops} \\
|
|
24
|
-
{
|
|
25
|
-
"""
|
|
24
|
+
--best-metric {args.best_metric} --num-epochs {args.num_epochs} --val-interval {args.val_interval} \\
|
|
25
|
+
"""
|
|
26
26
|
|
|
27
27
|
# If a model config is provided, use it to build the model
|
|
28
28
|
if args.model_config is not None:
|
|
29
29
|
command += f" --model-config {args.model_config}"
|
|
30
30
|
else:
|
|
31
|
-
|
|
31
|
+
channels = ",".join(map(str, args.channels))
|
|
32
|
+
strides = ",".join(map(str, args.strides))
|
|
33
|
+
command += (
|
|
34
|
+
f" --tversky-alpha {args.tversky_alpha}"
|
|
35
|
+
f" --channels {channels}"
|
|
36
|
+
f" --strides {strides}"
|
|
37
|
+
f" --dim-in {args.dim_in}"
|
|
38
|
+
f" --res-units {args.res_units}"
|
|
39
|
+
)
|
|
32
40
|
|
|
33
41
|
# If Model Weights are provided, use them to initialize the model
|
|
34
42
|
if args.model_weights is not None and args.model_config is not None:
|
|
@@ -240,4 +248,4 @@ def download_dataportal_slurm():
|
|
|
240
248
|
"""
|
|
241
249
|
parser_description = "Create a SLURM script for downloading tomograms from the Dataportal"
|
|
242
250
|
args = cli_dataportal_parser(parser_description, add_slurm=True)
|
|
243
|
-
create_download_dataportal_script(args)
|
|
251
|
+
create_download_dataportal_script(args)
|
|
@@ -30,46 +30,23 @@ def extract_membrane_bound_picks(
|
|
|
30
30
|
if n_procs is None:
|
|
31
31
|
n_procs = min(mp.cpu_count(), n_run_ids)
|
|
32
32
|
print(f"Using {n_procs} processes to parallelize across {n_run_ids} run IDs.")
|
|
33
|
-
|
|
34
|
-
#
|
|
35
|
-
with
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
target=extract.process_membrane_bound_extract,
|
|
51
|
-
args=(run,
|
|
52
|
-
voxel_size,
|
|
53
|
-
picks_info,
|
|
54
|
-
membrane_info,
|
|
55
|
-
organelle_info,
|
|
56
|
-
save_user_id,
|
|
57
|
-
save_session_id,
|
|
58
|
-
distance_threshold),
|
|
59
|
-
)
|
|
60
|
-
processes.append(p)
|
|
61
|
-
|
|
62
|
-
for p in processes:
|
|
63
|
-
p.start()
|
|
64
|
-
|
|
65
|
-
for p in processes:
|
|
66
|
-
p.join()
|
|
67
|
-
|
|
68
|
-
for p in processes:
|
|
69
|
-
p.close()
|
|
70
|
-
|
|
71
|
-
# Update tqdm progress bar
|
|
72
|
-
pbar.update(len(processes))
|
|
33
|
+
|
|
34
|
+
# Run Membrane-Protein Isolation - Main Parallelization Loop
|
|
35
|
+
with mp.Pool(processes=n_procs) as pool:
|
|
36
|
+
with tqdm(total=n_run_ids, desc="Membrane-Protein Isolation", unit="run") as pbar:
|
|
37
|
+
worker_func = lambda run_id: extract.process_membrane_bound_extract(
|
|
38
|
+
root.get_run(run_id),
|
|
39
|
+
voxel_size,
|
|
40
|
+
picks_info,
|
|
41
|
+
membrane_info,
|
|
42
|
+
organelle_info,
|
|
43
|
+
save_user_id,
|
|
44
|
+
save_session_id,
|
|
45
|
+
distance_threshold
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
for _ in pool.imap_unordered(worker_func, run_ids, chunksize=1):
|
|
49
|
+
pbar.update(1)
|
|
73
50
|
|
|
74
51
|
print('Extraction of Membrane-Bound Proteins Complete!')
|
|
75
52
|
|
|
@@ -5,6 +5,7 @@ import copick, argparse, pprint
|
|
|
5
5
|
from typing import List, Tuple
|
|
6
6
|
import multiprocess as mp
|
|
7
7
|
from tqdm import tqdm
|
|
8
|
+
import os
|
|
8
9
|
|
|
9
10
|
def pick_particles(
|
|
10
11
|
copick_config_path: str,
|
|
@@ -40,56 +41,39 @@ def pick_particles(
|
|
|
40
41
|
print(', '.join([f'{obj[0]} (Label: {obj[1]})' for obj in objects]) + '\n')
|
|
41
42
|
|
|
42
43
|
# Either Specify Input RunIDs or Run on All RunIDs
|
|
43
|
-
if runIDs:
|
|
44
|
-
|
|
44
|
+
if runIDs:
|
|
45
|
+
print('Running Localization on the Following RunIDs: ' + ', '.join(runIDs) + '\n')
|
|
46
|
+
run_ids = runIDs
|
|
47
|
+
else:
|
|
48
|
+
run_ids = [run.name for run in root.runs if run.get_voxel_spacing(voxel_size) is not None]
|
|
49
|
+
skipped_run_ids = [run.name for run in root.runs if run.get_voxel_spacing(voxel_size) is None]
|
|
50
|
+
|
|
51
|
+
if skipped_run_ids:
|
|
52
|
+
print(f"Warning: skipping runs with no voxel spacing {voxel_size}: {skipped_run_ids}")
|
|
53
|
+
|
|
54
|
+
# Nprocesses shouldnt exceed computation resource or number of available runs
|
|
45
55
|
n_run_ids = len(run_ids)
|
|
56
|
+
n_procs = min(mp.mp.cpu_count(), n_procs, n_run_ids)
|
|
46
57
|
|
|
47
|
-
#
|
|
48
|
-
if n_procs is None:
|
|
49
|
-
n_procs = min(int(mp.cpu_count()//4), n_run_ids)
|
|
58
|
+
# Run Localization - Main Parallelization Loop
|
|
50
59
|
print(f"Using {n_procs} processes to parallelize across {n_run_ids} run IDs.")
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
target=localize.processs_localization,
|
|
69
|
-
args=(run,
|
|
70
|
-
objects,
|
|
71
|
-
seg_info,
|
|
72
|
-
method,
|
|
73
|
-
voxel_size,
|
|
74
|
-
filter_size,
|
|
75
|
-
radius_min_scale,
|
|
76
|
-
radius_max_scale,
|
|
77
|
-
pick_session_id,
|
|
78
|
-
pick_user_id),
|
|
79
|
-
)
|
|
80
|
-
processes.append(p)
|
|
81
|
-
|
|
82
|
-
for p in processes:
|
|
83
|
-
p.start()
|
|
84
|
-
|
|
85
|
-
for p in processes:
|
|
86
|
-
p.join()
|
|
87
|
-
|
|
88
|
-
for p in processes:
|
|
89
|
-
p.close()
|
|
90
|
-
|
|
91
|
-
# Update tqdm progress bar
|
|
92
|
-
pbar.update(len(processes))
|
|
60
|
+
with mp.Pool(processes=n_procs) as pool:
|
|
61
|
+
with tqdm(total=n_run_ids, desc="Localization", unit="run") as pbar:
|
|
62
|
+
worker_func = lambda run_id: localize.processs_localization(
|
|
63
|
+
root.get_run(run_id),
|
|
64
|
+
objects,
|
|
65
|
+
seg_info,
|
|
66
|
+
method,
|
|
67
|
+
voxel_size,
|
|
68
|
+
filter_size,
|
|
69
|
+
radius_min_scale,
|
|
70
|
+
radius_max_scale,
|
|
71
|
+
pick_session_id,
|
|
72
|
+
pick_user_id
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
for _ in pool.imap_unordered(worker_func, run_ids, chunksize=1):
|
|
76
|
+
pbar.update(1)
|
|
93
77
|
|
|
94
78
|
print('Localization Complete!')
|
|
95
79
|
|
|
@@ -110,7 +94,7 @@ def localize_parser(parser_description, add_slurm: bool = False):
|
|
|
110
94
|
localize_group.add_argument("--radius-max-scale", type=float, default=1.0, required=False, help="Maximum radius scale for particles.")
|
|
111
95
|
localize_group.add_argument("--filter-size", type=int, default=10, required=False, help="Filter size for localization.")
|
|
112
96
|
localize_group.add_argument("--pick-objects", type=utils.parse_list, default=None, required=False, help="Specific Objects to Find Picks for.")
|
|
113
|
-
localize_group.add_argument("--n-procs", type=int, default=
|
|
97
|
+
localize_group.add_argument("--n-procs", type=int, default=8, required=False, help="Number of CPU processes to parallelize runs across. Defaults to the max number of cores available or available runs.")
|
|
114
98
|
|
|
115
99
|
output_group = parser.add_argument_group("Output Arguments")
|
|
116
100
|
output_group.add_argument("--pick-session-id", type=str, default='1', required=False, help="Session ID for the particle picks.")
|
|
@@ -9,7 +9,7 @@ from octopi import io
|
|
|
9
9
|
import scipy.ndimage as ndi
|
|
10
10
|
from tqdm import tqdm
|
|
11
11
|
import numpy as np
|
|
12
|
-
import
|
|
12
|
+
import gc
|
|
13
13
|
|
|
14
14
|
def processs_localization(run,
|
|
15
15
|
objects,
|
|
@@ -107,7 +107,7 @@ def extract_particle_centroids_via_watershed(
|
|
|
107
107
|
max_particle_size = (4 / 3) * np.pi * (max_particle_radius ** 3)
|
|
108
108
|
|
|
109
109
|
# Create a binary mask for the specific segmentation label
|
|
110
|
-
binary_mask = (segmentation == segmentation_idx).astype(
|
|
110
|
+
binary_mask = (segmentation == segmentation_idx).astype(np.uint8)
|
|
111
111
|
|
|
112
112
|
# Skip if the segmentation label is not present
|
|
113
113
|
if np.sum(binary_mask) == 0:
|
|
@@ -117,7 +117,12 @@ def extract_particle_centroids_via_watershed(
|
|
|
117
117
|
# Structuring element for erosion and dilation
|
|
118
118
|
struct_elem = ball(1)
|
|
119
119
|
eroded = binary_erosion(binary_mask, struct_elem)
|
|
120
|
+
del binary_mask
|
|
121
|
+
gc.collect()
|
|
122
|
+
|
|
120
123
|
dilated = binary_dilation(eroded, struct_elem)
|
|
124
|
+
del eroded
|
|
125
|
+
gc.collect()
|
|
121
126
|
|
|
122
127
|
# Distance transform and local maxima detection
|
|
123
128
|
distance = ndi.distance_transform_edt(dilated)
|
|
@@ -125,7 +130,14 @@ def extract_particle_centroids_via_watershed(
|
|
|
125
130
|
|
|
126
131
|
# Watershed segmentation
|
|
127
132
|
markers, _ = ndi.label(local_max)
|
|
133
|
+
del local_max
|
|
134
|
+
markers = markers.astype(np.uint8)
|
|
135
|
+
gc.collect()
|
|
136
|
+
|
|
128
137
|
watershed_labels = watershed(-distance, markers, mask=dilated)
|
|
138
|
+
del distance, markers, dilated
|
|
139
|
+
watershed_labels = watershed_labels.astype(np.uint8)
|
|
140
|
+
gc.collect()
|
|
129
141
|
|
|
130
142
|
# Extract region properties and filter based on particle size
|
|
131
143
|
all_centroids = []
|
|
@@ -135,6 +147,9 @@ def extract_particle_centroids_via_watershed(
|
|
|
135
147
|
# Option 1: Use all centroids
|
|
136
148
|
all_centroids.append(region.centroid)
|
|
137
149
|
|
|
150
|
+
del watershed_labels
|
|
151
|
+
gc.collect()
|
|
152
|
+
|
|
138
153
|
return all_centroids
|
|
139
154
|
|
|
140
155
|
def extract_particle_centroids_via_com(
|
|
@@ -137,7 +137,7 @@ def get_segmentation_array(run,
|
|
|
137
137
|
# No Segmentations Are Available, Result in Error
|
|
138
138
|
if len(seg) == 0:
|
|
139
139
|
# Get all available segmentations with their metadata
|
|
140
|
-
available_segs = run.get_segmentations(voxel_size=voxel_spacing)
|
|
140
|
+
available_segs = run.get_segmentations(voxel_size=float(voxel_spacing))
|
|
141
141
|
seg_info = [(s.name, s.user_id, s.session_id) for s in available_segs]
|
|
142
142
|
|
|
143
143
|
# Format the information for display
|
|
@@ -33,7 +33,7 @@ def cli_main():
|
|
|
33
33
|
"create-targets": (create_targets, "Generate segmentation targets from coordinates."),
|
|
34
34
|
"train": (train_model, "Train a single U-Net model."),
|
|
35
35
|
"model-explore": (model_explore, "Explore model architectures with Optuna / Bayesian Optimization."),
|
|
36
|
-
"
|
|
36
|
+
"segment": (inference, "Perform segmentation inference on tomograms."),
|
|
37
37
|
"localize": (localize, "Perform localization of particles in tomograms."),
|
|
38
38
|
"extract-mb-picks": (extract_mb_picks, "Extract MB Picks from tomograms."),
|
|
39
39
|
"evaluate": (evaluate, "Evaluate the performance of a model."),
|
|
@@ -42,7 +42,11 @@ def generate_targets(
|
|
|
42
42
|
|
|
43
43
|
# If runIDs are not provided, load all runs
|
|
44
44
|
if run_ids is None:
|
|
45
|
-
run_ids = [run.name for run in root.runs]
|
|
45
|
+
run_ids = [run.name for run in root.runs if run.get_voxel_spacing(voxel_size) is not None]
|
|
46
|
+
skipped_run_ids = [run.name for run in root.runs if run.get_voxel_spacing(voxel_size) is None]
|
|
47
|
+
|
|
48
|
+
if skipped_run_ids:
|
|
49
|
+
print(f"Warning: skipping runs with no voxel spacing {voxel_size}: {skipped_run_ids}")
|
|
46
50
|
|
|
47
51
|
# Iterate Over All Runs
|
|
48
52
|
for runID in tqdm(run_ids):
|
|
@@ -87,6 +91,9 @@ def generate_targets(
|
|
|
87
91
|
session_id=train_targets[target_name]["session_id"],
|
|
88
92
|
)
|
|
89
93
|
|
|
94
|
+
# Filter out empty picks
|
|
95
|
+
query = [pick for pick in query if pick.points is not None]
|
|
96
|
+
|
|
90
97
|
# Add Picks to Target
|
|
91
98
|
for pick in query:
|
|
92
99
|
numPicks += len(pick.points)
|
|
@@ -102,11 +102,6 @@ class FourierRescale:
|
|
|
102
102
|
"""
|
|
103
103
|
in_depth, in_height, in_width = volume.shape[-3:]
|
|
104
104
|
|
|
105
|
-
# Check if dimensions are odd
|
|
106
|
-
d_is_odd = in_depth % 2
|
|
107
|
-
h_is_odd = in_height % 2
|
|
108
|
-
w_is_odd = in_width % 2
|
|
109
|
-
|
|
110
105
|
# Calculate new dimensions
|
|
111
106
|
extent_depth = in_depth * self.input_voxel_size[0]
|
|
112
107
|
extent_height = in_height * self.input_voxel_size[1]
|
|
@@ -121,9 +116,10 @@ class FourierRescale:
|
|
|
121
116
|
new_height = new_height - (new_height % 2)
|
|
122
117
|
new_width = new_width - (new_width % 2)
|
|
123
118
|
|
|
124
|
-
# Calculate starting points
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
119
|
+
# Calculate starting points - properly centered around DC component
|
|
120
|
+
# No odd/even correction needed - just center the crop
|
|
121
|
+
start_d = (in_depth - new_depth) // 2
|
|
122
|
+
start_h = (in_height - new_height) // 2
|
|
123
|
+
start_w = (in_width - new_width) // 2
|
|
128
124
|
|
|
129
|
-
return start_d, start_h, start_w, new_depth, new_height, new_width
|
|
125
|
+
return start_d, start_h, start_w, new_depth, new_height, new_width
|
|
@@ -16,16 +16,16 @@ class ModelSearchSubmit:
|
|
|
16
16
|
voxel_size: float,
|
|
17
17
|
Nclass: int,
|
|
18
18
|
model_type: str,
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
trainRunIDs: List[str],
|
|
27
|
-
validateRunIDs: List[str],
|
|
28
|
-
|
|
19
|
+
best_metric: str = 'avg_f1',
|
|
20
|
+
num_epochs: int = 1000,
|
|
21
|
+
num_trials: int = 100,
|
|
22
|
+
data_split: str = 0.8,
|
|
23
|
+
random_seed: int = 42,
|
|
24
|
+
val_interval: int = 10,
|
|
25
|
+
tomo_batch_size: int = 15,
|
|
26
|
+
trainRunIDs: List[str] = None,
|
|
27
|
+
validateRunIDs: List[str] = None,
|
|
28
|
+
mlflow_experiment_name: str = 'explore',
|
|
29
29
|
):
|
|
30
30
|
"""
|
|
31
31
|
Initialize the ModelSearch class for architecture search with Optuna.
|
|
@@ -207,7 +207,7 @@ class ModelSearchSubmit:
|
|
|
207
207
|
# Run multi-GPU optimization
|
|
208
208
|
study = self.get_optuna_study()
|
|
209
209
|
study.optimize(
|
|
210
|
-
lambda trial: BayesianModelSearch(self.data_generator, self.model_type).multi_gpu_objective(
|
|
210
|
+
lambda trial: hyper_search.BayesianModelSearch(self.data_generator, self.model_type).multi_gpu_objective(
|
|
211
211
|
parent_run, trial,
|
|
212
212
|
self.num_epochs,
|
|
213
213
|
best_metric=self.best_metric,
|
|
@@ -193,8 +193,12 @@ class Predictor:
|
|
|
193
193
|
|
|
194
194
|
# If runIDs are not provided, load all runs
|
|
195
195
|
if runIDs is None:
|
|
196
|
-
runIDs = [run.name for run in self.root.runs]
|
|
197
|
-
|
|
196
|
+
runIDs = [run.name for run in self.root.runs if run.get_voxel_spacing(voxel_spacing) is not None]
|
|
197
|
+
skippedRunIDs = [run.name for run in self.root.runs if run.get_voxel_spacing(voxel_spacing) is None]
|
|
198
|
+
|
|
199
|
+
if skippedRunIDs:
|
|
200
|
+
print(f"Warning: skipping runs with no voxel spacing {voxel_spacing}: {skippedRunIDs}")
|
|
201
|
+
|
|
198
202
|
# Iterate over batches of runIDs
|
|
199
203
|
for i in range(0, len(runIDs), num_tomos_per_batch):
|
|
200
204
|
|
|
@@ -227,9 +231,9 @@ class Predictor:
|
|
|
227
231
|
lambda x: torch.rot90(x, k=1, dims=(3, 4)), # 90° rotation
|
|
228
232
|
lambda x: torch.rot90(x, k=2, dims=(3, 4)), # 180° rotation
|
|
229
233
|
lambda x: torch.rot90(x, k=3, dims=(3, 4)), # 270° rotation
|
|
230
|
-
#
|
|
231
|
-
#
|
|
232
|
-
#
|
|
234
|
+
# lambda x: torch.flip(x, dims=(3,)), # Flip along height (spatial_axis=1)
|
|
235
|
+
# lambda x: torch.flip(x, dims=(4,)), # Flip along width (spatial_axis=2)
|
|
236
|
+
# lambda x: torch.flip(x, dims=(3, 4)), # Flip along both height and width
|
|
233
237
|
]
|
|
234
238
|
|
|
235
239
|
# Define inverse transformations (flip back to original orientation)
|
|
@@ -238,9 +242,9 @@ class Predictor:
|
|
|
238
242
|
lambda x: torch.rot90(x, k=-1, dims=(2, 3)), # Inverse of 90° (i.e. -90°)
|
|
239
243
|
lambda x: torch.rot90(x, k=-2, dims=(2, 3)), # Inverse of 180° (i.e. -180°)
|
|
240
244
|
lambda x: torch.rot90(x, k=-3, dims=(2, 3)), # Inverse of 270° (i.e. -270°)
|
|
241
|
-
#
|
|
242
|
-
#
|
|
243
|
-
#
|
|
245
|
+
# lambda x: torch.flip(x, dims=(2,)), # Same as forward
|
|
246
|
+
# lambda x: torch.flip(x, dims=(3,)), # Same as forward
|
|
247
|
+
# lambda x: torch.flip(x, dims=(2, 3)), # Same as forward
|
|
244
248
|
]
|
|
245
249
|
|
|
246
250
|
###################################################################################################################################################
|
|
@@ -101,6 +101,9 @@ class ModelTrainer:
|
|
|
101
101
|
device=self.device
|
|
102
102
|
)
|
|
103
103
|
|
|
104
|
+
del val_inputs
|
|
105
|
+
torch.cuda.empty_cache()
|
|
106
|
+
|
|
104
107
|
# Compute the loss for this batch
|
|
105
108
|
loss = self.loss_function(val_outputs, val_labels) # Assuming self.loss_function is defined
|
|
106
109
|
val_loss += loss.item() # Accumulate the loss
|
|
@@ -112,6 +115,9 @@ class ModelTrainer:
|
|
|
112
115
|
# Compute metrics
|
|
113
116
|
self.metrics_function(y_pred=metric_val_outputs, y=metric_val_labels)
|
|
114
117
|
|
|
118
|
+
del val_labels, val_outputs, metric_val_outputs, metric_val_labels
|
|
119
|
+
torch.cuda.empty_cache()
|
|
120
|
+
|
|
115
121
|
# # Contains recall, precision, and f1 for each class
|
|
116
122
|
metric_values = self.metrics_function.aggregate(reduction='mean_batch')
|
|
117
123
|
|
|
@@ -435,4 +441,4 @@ class ModelTrainer:
|
|
|
435
441
|
best_metric = 'avg_f1'
|
|
436
442
|
|
|
437
443
|
return best_metric
|
|
438
|
-
|
|
444
|
+
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "octopi"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.1"
|
|
4
4
|
description = "Model architecture exploration for cryoET particle picking"
|
|
5
5
|
authors = ["Jonathan Schwartz", "Kevin Zhao"]
|
|
6
6
|
license = "MIT"
|
|
@@ -41,3 +41,8 @@ octopi-slurm = "octopi.main:cli_slurm_main"
|
|
|
41
41
|
[build-system]
|
|
42
42
|
requires = ["poetry-core>=1.0.0"]
|
|
43
43
|
build-backend = "poetry.core.masonry.api"
|
|
44
|
+
|
|
45
|
+
[tool.poetry.urls]
|
|
46
|
+
"Homepage" = "https://github.com/chanzuckerberg/octopi"
|
|
47
|
+
"Documentation" = "https://chanzuckerberg.github.io/octopi/"
|
|
48
|
+
"Issues" = "https://github.com/chanzuckerberg/octopi/issues"
|
octopi-1.0/PKG-INFO
DELETED
|
@@ -1,209 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.3
|
|
2
|
-
Name: octopi
|
|
3
|
-
Version: 1.0
|
|
4
|
-
Summary: Model architecture exploration for cryoET particle picking
|
|
5
|
-
License: MIT
|
|
6
|
-
Author: Jonathan Schwartz
|
|
7
|
-
Requires-Python: >=3.9,<4.0
|
|
8
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
-
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
-
Requires-Dist: copick
|
|
16
|
-
Requires-Dist: ipywidgets
|
|
17
|
-
Requires-Dist: kaleido
|
|
18
|
-
Requires-Dist: matplotlib
|
|
19
|
-
Requires-Dist: mlflow (==2.17.0)
|
|
20
|
-
Requires-Dist: monai-weekly (==1.5.dev2448)
|
|
21
|
-
Requires-Dist: mrcfile
|
|
22
|
-
Requires-Dist: multiprocess
|
|
23
|
-
Requires-Dist: nibabel
|
|
24
|
-
Requires-Dist: optuna (==4.0.0)
|
|
25
|
-
Requires-Dist: optuna-integration[botorch,pytorch-lightning]
|
|
26
|
-
Requires-Dist: pandas
|
|
27
|
-
Requires-Dist: plotly
|
|
28
|
-
Requires-Dist: python-dotenv
|
|
29
|
-
Requires-Dist: pytorch-lightning (==2.4.0)
|
|
30
|
-
Requires-Dist: requests (>=2.25.1,<3.0.0)
|
|
31
|
-
Requires-Dist: seaborn
|
|
32
|
-
Requires-Dist: torch-ema
|
|
33
|
-
Requires-Dist: tqdm
|
|
34
|
-
Description-Content-Type: text/markdown
|
|
35
|
-
|
|
36
|
-
# OCTOPI 🐙🐙🐙
|
|
37
|
-
**O**bject dete**CT**ion **O**f **P**rote**I**ns. A deep learning framework for Cryo-ET 3D particle picking with autonomous model exploration capabilities.
|
|
38
|
-
|
|
39
|
-
## 🚀 Introduction
|
|
40
|
-
|
|
41
|
-
octopi addresses a critical bottleneck in cryo-electron tomography (cryo-ET) research: the efficient identification and extraction of proteins within complex cellular environments. As advances in cryo-ET enable the collection of thousands of tomograms, the need for automated, accurate particle picking has become increasingly urgent.
|
|
42
|
-
|
|
43
|
-
Our deep learning-based pipeline streamlines the training and execution of 3D autoencoder models specifically designed for cryo-ET particle picking. Built on [copick](https://github.com/copick/copick), a storage-agnostic API, octopi seamlessly accesses tomograms and segmentations across local and remote environments.
|
|
44
|
-
|
|
45
|
-
## 🧩 Features
|
|
46
|
-
|
|
47
|
-
octopi offers a modular, deep learning-driven pipeline for:
|
|
48
|
-
* Training and evaluating custom 3D U-Net models for particle segmentation.
|
|
49
|
-
* Automatically exploring model architectures using Bayesian optimization via Optuna.
|
|
50
|
-
* Performing inference for both semantic segmentation and particle localization.
|
|
51
|
-
|
|
52
|
-
octopi empowers researchers to navigate the dense, intricate landscapes of cryo-ET datasets with unprecedented precision and efficiency without manual trial and error.
|
|
53
|
-
|
|
54
|
-
## Getting Started
|
|
55
|
-
### Installation
|
|
56
|
-
|
|
57
|
-
*Octopi* is available on PyPI.
|
|
58
|
-
```
|
|
59
|
-
pip install octopi
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
## 📚 Usage
|
|
63
|
-
|
|
64
|
-
octopi provides a clean, scriptable command-line interface. Run the following command to view all available subcommands:
|
|
65
|
-
```
|
|
66
|
-
octopi --help
|
|
67
|
-
```
|
|
68
|
-
Each subcommand supports its own --help flag for detailed usage. To see practical examples of how to interface directly with the octopi API, explore the notebooks/ folder.
|
|
69
|
-
|
|
70
|
-
If you're running octopi on an HPC cluster, several SLURM-compatible submission commands are available. You can view them by running:
|
|
71
|
-
```
|
|
72
|
-
octopi-slurm --help
|
|
73
|
-
```
|
|
74
|
-
This provides utilities for submitting training, inference, and localization jobs in SLURM-based environments.
|
|
75
|
-
|
|
76
|
-
### 📥 Data Import & Preprocessing
|
|
77
|
-
|
|
78
|
-
To train or run inference with octopi, your tomograms must be organized inside a CoPick project. octopi supports two primary methods for data ingestion, both of which include optional Fourier cropping to reduce resolution and accelerate downstream processing.
|
|
79
|
-
|
|
80
|
-
If your tomograms are already processed and stored locally in .mrc format (e.g., from Warp, IMOD, or AreTomo), you can import them into a new or existing CoPick project using:
|
|
81
|
-
|
|
82
|
-
```
|
|
83
|
-
octopi import-mrc-volumes \
|
|
84
|
-
--input-folder /path/to/mrc/files --config /path/to/config.json \
|
|
85
|
-
--target-tomo-type denoised --input-voxel-size --output-voxel-size 10
|
|
86
|
-
```
|
|
87
|
-
|
|
88
|
-
octopi also can process tomograms that are hosted on the data portal. Users can download tomograms onto their own remote machine especially if they would like to downsample the tomograms to a lower resolution for speed and memory. You can download and process the tomograms using:
|
|
89
|
-
```
|
|
90
|
-
octopi download-dataportal \
|
|
91
|
-
--config /path/to/config.json --datasetID 10445 --overlay-path path/to/saved/zarrs \
|
|
92
|
-
--input-voxel-size 5 --output-voxel-size 10 \
|
|
93
|
-
--dataportal-name wbp --target-tomotype wbp
|
|
94
|
-
```
|
|
95
|
-
|
|
96
|
-
### 📁 Training Labels Preparation
|
|
97
|
-
|
|
98
|
-
Use `octopi create-targets` to create semantic masks for proteins of interest using annotation metadata. In this example lets generate picks segmentations for dataset 10439 from the CZ cryoET Dataportal (only need to run this step once).
|
|
99
|
-
```
|
|
100
|
-
octopi create-targets \
|
|
101
|
-
--config config.json \
|
|
102
|
-
--target apoferritin --target beta-galactosidase,slabpick,1 \
|
|
103
|
-
--target ribosome,pytom,0 --target virus-like-particle,pytom,0 \
|
|
104
|
-
--seg-target membrane \
|
|
105
|
-
--tomo-alg wbp --voxel-size 10 \
|
|
106
|
-
--target-session-id 1 --target-segmentation-name remotetargets \
|
|
107
|
-
--target-user-id train-octopi
|
|
108
|
-
```
|
|
109
|
-
|
|
110
|
-
### 🧠 Training a single 3D U-Net model
|
|
111
|
-
Train a 3D U-Net model on the prepared datasets using the prepared target segmentations. We can use tomograms derived from multiple copick projects.
|
|
112
|
-
```
|
|
113
|
-
octopi train-model \
|
|
114
|
-
--config experiment,config1.json \
|
|
115
|
-
--config simulation,config2.json \
|
|
116
|
-
--voxel-size 10 --tomo-alg wbp --Nclass 8 \
|
|
117
|
-
--tomo-batch-size 50 --num-epochs 100 --val-interval 10 \
|
|
118
|
-
--target-info remotetargets,train-octopi,1
|
|
119
|
-
```
|
|
120
|
-
Outputs will include model weights (.pth), logs, and training metrics.
|
|
121
|
-
|
|
122
|
-
### 🔍 Model exploration with Optuna
|
|
123
|
-
|
|
124
|
-
octopi🐙 supports automatic neural architecture search using Optuna, enabling efficient discovery of optimal 3D U-Net configurations through Bayesian optimization. This allows users to maximize segmentation accuracy without manual tuning.
|
|
125
|
-
|
|
126
|
-
To launch a model exploration job:
|
|
127
|
-
```
|
|
128
|
-
octopi model-explore \
|
|
129
|
-
--config experiment,/mnt/dataportal/ml_challenge/config.json \
|
|
130
|
-
--config simulation,/mnt/dataportal/synthetic_ml_challenge/config.json \
|
|
131
|
-
--voxel-size 10 --tomo-alg wbp --Nclass 8 \
|
|
132
|
-
--model-save-path train_results
|
|
133
|
-
```
|
|
134
|
-
Each trial evaluates a different architecture and logs:
|
|
135
|
-
• Segmentation performance metrics
|
|
136
|
-
• Model weights and configs
|
|
137
|
-
• Training curves and validation loss
|
|
138
|
-
|
|
139
|
-
🔬 Trials are automatically tracked with MLflow and saved under the specified `--model-save-path`.
|
|
140
|
-
|
|
141
|
-
#### Optuna Dashboard
|
|
142
|
-
|
|
143
|
-
To quickly asses the exploration results and observe which trials results the best architectures, Optuna provides a dashboard that summarizes all the information on a dashboard. The instrucutions to access the dashboard are available here - https://optuna-dashboard.readthedocs.io/en/latest/getting-started.html, it is recommended to use either VS-Code extension or CLI.
|
|
144
|
-
|
|
145
|
-
#### 📊 MLflow experiment tracking
|
|
146
|
-
|
|
147
|
-
To use CZI cloud MLflow tracker, add a `.env` in the root directory like below. You can get a CZI MLflow access token from [here](https://mlflow.cw.use4-prod.si.czi.technology/api/2.0/mlflow/users/access-token) (note that a new token will be generated everytime you open this site).
|
|
148
|
-
```
|
|
149
|
-
MLFLOW_TRACKING_USERNAME = <Your_CZ_email>
|
|
150
|
-
MLFLOW_TRACKING_PASSWORD = <Your_mlflow_access_token>
|
|
151
|
-
```
|
|
152
|
-
|
|
153
|
-
octopi supports MLflow for logging and visualizing model training and hyperparameter search results, including:
|
|
154
|
-
• Training loss/validation metrics over time
|
|
155
|
-
• Model hyperparameters and architecture details
|
|
156
|
-
• Trial comparison (e.g., best performing model)
|
|
157
|
-
|
|
158
|
-
You can use either a local MLflow instance, a remote (HPC) instance, or the CZI cloud server:
|
|
159
|
-
|
|
160
|
-
#### 🧪 Local MLflow Dashboard
|
|
161
|
-
|
|
162
|
-
To inspect results locally: `mlflow ui` and open http://localhost:5000 in your browser.
|
|
163
|
-
|
|
164
|
-
#### 🖥️ HPC Cluster MLflow Access (Remote via SSH tunnel)
|
|
165
|
-
|
|
166
|
-
If running octopi on a remote cluster (e.g., Biohub Bruno), forward the MLflow port.
|
|
167
|
-
On your local machine:
|
|
168
|
-
`ssh -L 5000:localhost:5000 remote_username@remote_host` (in the case of Bruno the remote would be `login01.czbiohub.org`).
|
|
169
|
-
|
|
170
|
-
Then on the remote terminal (login node): ` mlflow ui --host 0.0.0.0 --port 5000` to launch the MLFlow dashboard on a local borwser.
|
|
171
|
-
|
|
172
|
-
#### ☁️ CZI coreweave cluser
|
|
173
|
-
|
|
174
|
-
For the CZI coreweave cluser, MLflow is already hosted. Go to the CZI [mlflow server](https://mlflow.cw.use4-prod.si.czi.technology/).
|
|
175
|
-
|
|
176
|
-
🔐 A .env file is required to authenticate (see Getting Started section).
|
|
177
|
-
📁 Be sure to register your project name in MLflow before launching runs.
|
|
178
|
-
|
|
179
|
-
### 🔮 Segmentation
|
|
180
|
-
Generate segmentation prediction masks for tomograms in a given copick project.
|
|
181
|
-
```
|
|
182
|
-
octopi inference \
|
|
183
|
-
--config config.json \
|
|
184
|
-
--seg-info predict,unet,1 \
|
|
185
|
-
--model-config train_results/best_model_config.yaml \
|
|
186
|
-
--model-weights train_results/best_model.pth \
|
|
187
|
-
--voxel-size 10 --tomo-alg wbp --tomo-batch-size 25
|
|
188
|
-
```
|
|
189
|
-
Output masks will be saved to the corresponding copick project under the `seg-info` input.
|
|
190
|
-
|
|
191
|
-
### 📍 Localization
|
|
192
|
-
Convert the segmentation masks into particle coordinates.
|
|
193
|
-
```
|
|
194
|
-
octopi localize \
|
|
195
|
-
--config config.json \
|
|
196
|
-
--pick-session-id 1 --pick-user-id unet \
|
|
197
|
-
--seg-info predict,unet,1
|
|
198
|
-
```
|
|
199
|
-
|
|
200
|
-
## Contributing
|
|
201
|
-
|
|
202
|
-
This project adheres to the Contributor Covenant code of conduct. By participating, you are expected to uphold this code. Please report unacceptable behavior to opensource@chanzuckerberg.com.
|
|
203
|
-
|
|
204
|
-
## Reporting Security Issues
|
|
205
|
-
|
|
206
|
-
Please note: If you believe you have found a security issue, please responsibly disclose by contacting us at security@chanzuckerberg.com.
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
octopi-1.0/README.md
DELETED
|
@@ -1,173 +0,0 @@
|
|
|
1
|
-
# OCTOPI 🐙🐙🐙
|
|
2
|
-
**O**bject dete**CT**ion **O**f **P**rote**I**ns. A deep learning framework for Cryo-ET 3D particle picking with autonomous model exploration capabilities.
|
|
3
|
-
|
|
4
|
-
## 🚀 Introduction
|
|
5
|
-
|
|
6
|
-
octopi addresses a critical bottleneck in cryo-electron tomography (cryo-ET) research: the efficient identification and extraction of proteins within complex cellular environments. As advances in cryo-ET enable the collection of thousands of tomograms, the need for automated, accurate particle picking has become increasingly urgent.
|
|
7
|
-
|
|
8
|
-
Our deep learning-based pipeline streamlines the training and execution of 3D autoencoder models specifically designed for cryo-ET particle picking. Built on [copick](https://github.com/copick/copick), a storage-agnostic API, octopi seamlessly accesses tomograms and segmentations across local and remote environments.
|
|
9
|
-
|
|
10
|
-
## 🧩 Features
|
|
11
|
-
|
|
12
|
-
octopi offers a modular, deep learning-driven pipeline for:
|
|
13
|
-
* Training and evaluating custom 3D U-Net models for particle segmentation.
|
|
14
|
-
* Automatically exploring model architectures using Bayesian optimization via Optuna.
|
|
15
|
-
* Performing inference for both semantic segmentation and particle localization.
|
|
16
|
-
|
|
17
|
-
octopi empowers researchers to navigate the dense, intricate landscapes of cryo-ET datasets with unprecedented precision and efficiency without manual trial and error.
|
|
18
|
-
|
|
19
|
-
## Getting Started
|
|
20
|
-
### Installation
|
|
21
|
-
|
|
22
|
-
*Octopi* is available on PyPI.
|
|
23
|
-
```
|
|
24
|
-
pip install octopi
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
## 📚 Usage
|
|
28
|
-
|
|
29
|
-
octopi provides a clean, scriptable command-line interface. Run the following command to view all available subcommands:
|
|
30
|
-
```
|
|
31
|
-
octopi --help
|
|
32
|
-
```
|
|
33
|
-
Each subcommand supports its own --help flag for detailed usage. To see practical examples of how to interface directly with the octopi API, explore the notebooks/ folder.
|
|
34
|
-
|
|
35
|
-
If you're running octopi on an HPC cluster, several SLURM-compatible submission commands are available. You can view them by running:
|
|
36
|
-
```
|
|
37
|
-
octopi-slurm --help
|
|
38
|
-
```
|
|
39
|
-
This provides utilities for submitting training, inference, and localization jobs in SLURM-based environments.
|
|
40
|
-
|
|
41
|
-
### 📥 Data Import & Preprocessing
|
|
42
|
-
|
|
43
|
-
To train or run inference with octopi, your tomograms must be organized inside a CoPick project. octopi supports two primary methods for data ingestion, both of which include optional Fourier cropping to reduce resolution and accelerate downstream processing.
|
|
44
|
-
|
|
45
|
-
If your tomograms are already processed and stored locally in .mrc format (e.g., from Warp, IMOD, or AreTomo), you can import them into a new or existing CoPick project using:
|
|
46
|
-
|
|
47
|
-
```
|
|
48
|
-
octopi import-mrc-volumes \
|
|
49
|
-
--input-folder /path/to/mrc/files --config /path/to/config.json \
|
|
50
|
-
--target-tomo-type denoised --input-voxel-size --output-voxel-size 10
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
octopi also can process tomograms that are hosted on the data portal. Users can download tomograms onto their own remote machine especially if they would like to downsample the tomograms to a lower resolution for speed and memory. You can download and process the tomograms using:
|
|
54
|
-
```
|
|
55
|
-
octopi download-dataportal \
|
|
56
|
-
--config /path/to/config.json --datasetID 10445 --overlay-path path/to/saved/zarrs \
|
|
57
|
-
--input-voxel-size 5 --output-voxel-size 10 \
|
|
58
|
-
--dataportal-name wbp --target-tomotype wbp
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
### 📁 Training Labels Preparation
|
|
62
|
-
|
|
63
|
-
Use `octopi create-targets` to create semantic masks for proteins of interest using annotation metadata. In this example lets generate picks segmentations for dataset 10439 from the CZ cryoET Dataportal (only need to run this step once).
|
|
64
|
-
```
|
|
65
|
-
octopi create-targets \
|
|
66
|
-
--config config.json \
|
|
67
|
-
--target apoferritin --target beta-galactosidase,slabpick,1 \
|
|
68
|
-
--target ribosome,pytom,0 --target virus-like-particle,pytom,0 \
|
|
69
|
-
--seg-target membrane \
|
|
70
|
-
--tomo-alg wbp --voxel-size 10 \
|
|
71
|
-
--target-session-id 1 --target-segmentation-name remotetargets \
|
|
72
|
-
--target-user-id train-octopi
|
|
73
|
-
```
|
|
74
|
-
|
|
75
|
-
### 🧠 Training a single 3D U-Net model
|
|
76
|
-
Train a 3D U-Net model on the prepared datasets using the prepared target segmentations. We can use tomograms derived from multiple copick projects.
|
|
77
|
-
```
|
|
78
|
-
octopi train-model \
|
|
79
|
-
--config experiment,config1.json \
|
|
80
|
-
--config simulation,config2.json \
|
|
81
|
-
--voxel-size 10 --tomo-alg wbp --Nclass 8 \
|
|
82
|
-
--tomo-batch-size 50 --num-epochs 100 --val-interval 10 \
|
|
83
|
-
--target-info remotetargets,train-octopi,1
|
|
84
|
-
```
|
|
85
|
-
Outputs will include model weights (.pth), logs, and training metrics.
|
|
86
|
-
|
|
87
|
-
### 🔍 Model exploration with Optuna
|
|
88
|
-
|
|
89
|
-
octopi🐙 supports automatic neural architecture search using Optuna, enabling efficient discovery of optimal 3D U-Net configurations through Bayesian optimization. This allows users to maximize segmentation accuracy without manual tuning.
|
|
90
|
-
|
|
91
|
-
To launch a model exploration job:
|
|
92
|
-
```
|
|
93
|
-
octopi model-explore \
|
|
94
|
-
--config experiment,/mnt/dataportal/ml_challenge/config.json \
|
|
95
|
-
--config simulation,/mnt/dataportal/synthetic_ml_challenge/config.json \
|
|
96
|
-
--voxel-size 10 --tomo-alg wbp --Nclass 8 \
|
|
97
|
-
--model-save-path train_results
|
|
98
|
-
```
|
|
99
|
-
Each trial evaluates a different architecture and logs:
|
|
100
|
-
• Segmentation performance metrics
|
|
101
|
-
• Model weights and configs
|
|
102
|
-
• Training curves and validation loss
|
|
103
|
-
|
|
104
|
-
🔬 Trials are automatically tracked with MLflow and saved under the specified `--model-save-path`.
|
|
105
|
-
|
|
106
|
-
#### Optuna Dashboard
|
|
107
|
-
|
|
108
|
-
To quickly asses the exploration results and observe which trials results the best architectures, Optuna provides a dashboard that summarizes all the information on a dashboard. The instrucutions to access the dashboard are available here - https://optuna-dashboard.readthedocs.io/en/latest/getting-started.html, it is recommended to use either VS-Code extension or CLI.
|
|
109
|
-
|
|
110
|
-
#### 📊 MLflow experiment tracking
|
|
111
|
-
|
|
112
|
-
To use CZI cloud MLflow tracker, add a `.env` in the root directory like below. You can get a CZI MLflow access token from [here](https://mlflow.cw.use4-prod.si.czi.technology/api/2.0/mlflow/users/access-token) (note that a new token will be generated everytime you open this site).
|
|
113
|
-
```
|
|
114
|
-
MLFLOW_TRACKING_USERNAME = <Your_CZ_email>
|
|
115
|
-
MLFLOW_TRACKING_PASSWORD = <Your_mlflow_access_token>
|
|
116
|
-
```
|
|
117
|
-
|
|
118
|
-
octopi supports MLflow for logging and visualizing model training and hyperparameter search results, including:
|
|
119
|
-
• Training loss/validation metrics over time
|
|
120
|
-
• Model hyperparameters and architecture details
|
|
121
|
-
• Trial comparison (e.g., best performing model)
|
|
122
|
-
|
|
123
|
-
You can use either a local MLflow instance, a remote (HPC) instance, or the CZI cloud server:
|
|
124
|
-
|
|
125
|
-
#### 🧪 Local MLflow Dashboard
|
|
126
|
-
|
|
127
|
-
To inspect results locally: `mlflow ui` and open http://localhost:5000 in your browser.
|
|
128
|
-
|
|
129
|
-
#### 🖥️ HPC Cluster MLflow Access (Remote via SSH tunnel)
|
|
130
|
-
|
|
131
|
-
If running octopi on a remote cluster (e.g., Biohub Bruno), forward the MLflow port.
|
|
132
|
-
On your local machine:
|
|
133
|
-
`ssh -L 5000:localhost:5000 remote_username@remote_host` (in the case of Bruno the remote would be `login01.czbiohub.org`).
|
|
134
|
-
|
|
135
|
-
Then on the remote terminal (login node): ` mlflow ui --host 0.0.0.0 --port 5000` to launch the MLFlow dashboard on a local borwser.
|
|
136
|
-
|
|
137
|
-
#### ☁️ CZI coreweave cluser
|
|
138
|
-
|
|
139
|
-
For the CZI coreweave cluser, MLflow is already hosted. Go to the CZI [mlflow server](https://mlflow.cw.use4-prod.si.czi.technology/).
|
|
140
|
-
|
|
141
|
-
🔐 A .env file is required to authenticate (see Getting Started section).
|
|
142
|
-
📁 Be sure to register your project name in MLflow before launching runs.
|
|
143
|
-
|
|
144
|
-
### 🔮 Segmentation
|
|
145
|
-
Generate segmentation prediction masks for tomograms in a given copick project.
|
|
146
|
-
```
|
|
147
|
-
octopi inference \
|
|
148
|
-
--config config.json \
|
|
149
|
-
--seg-info predict,unet,1 \
|
|
150
|
-
--model-config train_results/best_model_config.yaml \
|
|
151
|
-
--model-weights train_results/best_model.pth \
|
|
152
|
-
--voxel-size 10 --tomo-alg wbp --tomo-batch-size 25
|
|
153
|
-
```
|
|
154
|
-
Output masks will be saved to the corresponding copick project under the `seg-info` input.
|
|
155
|
-
|
|
156
|
-
### 📍 Localization
|
|
157
|
-
Convert the segmentation masks into particle coordinates.
|
|
158
|
-
```
|
|
159
|
-
octopi localize \
|
|
160
|
-
--config config.json \
|
|
161
|
-
--pick-session-id 1 --pick-user-id unet \
|
|
162
|
-
--seg-info predict,unet,1
|
|
163
|
-
```
|
|
164
|
-
|
|
165
|
-
## Contributing
|
|
166
|
-
|
|
167
|
-
This project adheres to the Contributor Covenant code of conduct. By participating, you are expected to uphold this code. Please report unacceptable behavior to opensource@chanzuckerberg.com.
|
|
168
|
-
|
|
169
|
-
## Reporting Security Issues
|
|
170
|
-
|
|
171
|
-
Please note: If you believe you have found a security issue, please responsibly disclose by contacting us at security@chanzuckerberg.com.
|
|
172
|
-
|
|
173
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|