octopi 1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of octopi might be problematic. Click here for more details.

Files changed (58) hide show
  1. octopi-1.0/LICENSE +41 -0
  2. octopi-1.0/PKG-INFO +209 -0
  3. octopi-1.0/README.md +173 -0
  4. octopi-1.0/octopi/__init__.py +0 -0
  5. octopi-1.0/octopi/datasets/__init__.py +0 -0
  6. octopi-1.0/octopi/datasets/augment.py +84 -0
  7. octopi-1.0/octopi/datasets/cached_datset.py +113 -0
  8. octopi-1.0/octopi/datasets/dataset.py +19 -0
  9. octopi-1.0/octopi/datasets/generators.py +429 -0
  10. octopi-1.0/octopi/datasets/mixup.py +49 -0
  11. octopi-1.0/octopi/datasets/multi_config_generator.py +253 -0
  12. octopi-1.0/octopi/entry_points/__init__.py +0 -0
  13. octopi-1.0/octopi/entry_points/common.py +80 -0
  14. octopi-1.0/octopi/entry_points/create_slurm_submission.py +243 -0
  15. octopi-1.0/octopi/entry_points/run_create_targets.py +281 -0
  16. octopi-1.0/octopi/entry_points/run_evaluate.py +65 -0
  17. octopi-1.0/octopi/entry_points/run_extract_mb_picks.py +141 -0
  18. octopi-1.0/octopi/entry_points/run_extract_midpoint.py +143 -0
  19. octopi-1.0/octopi/entry_points/run_localize.py +222 -0
  20. octopi-1.0/octopi/entry_points/run_optuna.py +139 -0
  21. octopi-1.0/octopi/entry_points/run_segment_predict.py +166 -0
  22. octopi-1.0/octopi/entry_points/run_train.py +201 -0
  23. octopi-1.0/octopi/extract/__init__.py +0 -0
  24. octopi-1.0/octopi/extract/localize.py +254 -0
  25. octopi-1.0/octopi/extract/membranebound_extract.py +262 -0
  26. octopi-1.0/octopi/extract/midpoint_extract.py +193 -0
  27. octopi-1.0/octopi/io.py +457 -0
  28. octopi-1.0/octopi/losses.py +86 -0
  29. octopi-1.0/octopi/main.py +101 -0
  30. octopi-1.0/octopi/models/AttentionUnet.py +56 -0
  31. octopi-1.0/octopi/models/MedNeXt.py +111 -0
  32. octopi-1.0/octopi/models/ModelTemplate.py +36 -0
  33. octopi-1.0/octopi/models/SegResNet.py +92 -0
  34. octopi-1.0/octopi/models/Unet.py +59 -0
  35. octopi-1.0/octopi/models/UnetPlusPlus.py +47 -0
  36. octopi-1.0/octopi/models/__init__.py +0 -0
  37. octopi-1.0/octopi/models/common.py +62 -0
  38. octopi-1.0/octopi/processing/__init__.py +0 -0
  39. octopi-1.0/octopi/processing/create_targets_from_picks.py +106 -0
  40. octopi-1.0/octopi/processing/downsample.py +129 -0
  41. octopi-1.0/octopi/processing/evaluate.py +289 -0
  42. octopi-1.0/octopi/processing/importers.py +213 -0
  43. octopi-1.0/octopi/processing/my_metrics.py +26 -0
  44. octopi-1.0/octopi/processing/segmentation_from_picks.py +167 -0
  45. octopi-1.0/octopi/processing/writers.py +102 -0
  46. octopi-1.0/octopi/pytorch/__init__.py +0 -0
  47. octopi-1.0/octopi/pytorch/hyper_search.py +243 -0
  48. octopi-1.0/octopi/pytorch/model_search_submitter.py +290 -0
  49. octopi-1.0/octopi/pytorch/segmentation.py +317 -0
  50. octopi-1.0/octopi/pytorch/trainer.py +438 -0
  51. octopi-1.0/octopi/pytorch_lightning/__init__.py +0 -0
  52. octopi-1.0/octopi/pytorch_lightning/optuna_pl_ddp.py +273 -0
  53. octopi-1.0/octopi/pytorch_lightning/train_pl.py +244 -0
  54. octopi-1.0/octopi/stopping_criteria.py +143 -0
  55. octopi-1.0/octopi/submit_slurm.py +95 -0
  56. octopi-1.0/octopi/utils.py +238 -0
  57. octopi-1.0/octopi/visualization_tools.py +201 -0
  58. octopi-1.0/pyproject.toml +43 -0
octopi-1.0/LICENSE ADDED
@@ -0,0 +1,41 @@
1
+ # Legal
2
+
3
+ ## License for the octopi package
4
+
5
+ This package is licensed under the MIT License:
6
+
7
+ ```
8
+ MIT License
9
+
10
+ Copyright (c) 2025 Chan Zuckerberg Initiative
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+ ```
30
+
31
+ ## License Notice for Dependencies
32
+
33
+ ```
34
+ This repository is licensed under the MIT License; however, it relies on certain third-party dependencies that are licensed under the GNU General Public License (GPL). Specifically:
35
+
36
+ - monai is licensed under the Apache License 2.0.
37
+ - pytorch-lightning is licensed under the Apache License 2.0.
38
+
39
+ All dependencies use permissive open-source licenses that are compatible with this project's MIT License. No GPL or other copyleft licensed dependencies are included.
40
+ For specific licensing information about any dependency, please refer to the respective package documentation or repository.
41
+ ```
octopi-1.0/PKG-INFO ADDED
@@ -0,0 +1,209 @@
1
+ Metadata-Version: 2.3
2
+ Name: octopi
3
+ Version: 1.0
4
+ Summary: Model architecture exploration for cryoET particle picking
5
+ License: MIT
6
+ Author: Jonathan Schwartz
7
+ Requires-Python: >=3.9,<4.0
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.9
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Requires-Dist: copick
16
+ Requires-Dist: ipywidgets
17
+ Requires-Dist: kaleido
18
+ Requires-Dist: matplotlib
19
+ Requires-Dist: mlflow (==2.17.0)
20
+ Requires-Dist: monai-weekly (==1.5.dev2448)
21
+ Requires-Dist: mrcfile
22
+ Requires-Dist: multiprocess
23
+ Requires-Dist: nibabel
24
+ Requires-Dist: optuna (==4.0.0)
25
+ Requires-Dist: optuna-integration[botorch,pytorch-lightning]
26
+ Requires-Dist: pandas
27
+ Requires-Dist: plotly
28
+ Requires-Dist: python-dotenv
29
+ Requires-Dist: pytorch-lightning (==2.4.0)
30
+ Requires-Dist: requests (>=2.25.1,<3.0.0)
31
+ Requires-Dist: seaborn
32
+ Requires-Dist: torch-ema
33
+ Requires-Dist: tqdm
34
+ Description-Content-Type: text/markdown
35
+
36
+ # OCTOPI 🐙🐙🐙
37
+ **O**bject dete**CT**ion **O**f **P**rote**I**ns. A deep learning framework for Cryo-ET 3D particle picking with autonomous model exploration capabilities.
38
+
39
+ ## 🚀 Introduction
40
+
41
+ octopi addresses a critical bottleneck in cryo-electron tomography (cryo-ET) research: the efficient identification and extraction of proteins within complex cellular environments. As advances in cryo-ET enable the collection of thousands of tomograms, the need for automated, accurate particle picking has become increasingly urgent.
42
+
43
+ Our deep learning-based pipeline streamlines the training and execution of 3D autoencoder models specifically designed for cryo-ET particle picking. Built on [copick](https://github.com/copick/copick), a storage-agnostic API, octopi seamlessly accesses tomograms and segmentations across local and remote environments.
44
+
45
+ ## 🧩 Features
46
+
47
+ octopi offers a modular, deep learning-driven pipeline for:
48
+ * Training and evaluating custom 3D U-Net models for particle segmentation.
49
+ * Automatically exploring model architectures using Bayesian optimization via Optuna.
50
+ * Performing inference for both semantic segmentation and particle localization.
51
+
52
+ octopi empowers researchers to navigate the dense, intricate landscapes of cryo-ET datasets with unprecedented precision and efficiency without manual trial and error.
53
+
54
+ ## Getting Started
55
+ ### Installation
56
+
57
+ *Octopi* is available on PyPI.
58
+ ```
59
+ pip install octopi
60
+ ```
61
+
62
+ ## 📚 Usage
63
+
64
+ octopi provides a clean, scriptable command-line interface. Run the following command to view all available subcommands:
65
+ ```
66
+ octopi --help
67
+ ```
68
+ Each subcommand supports its own --help flag for detailed usage. To see practical examples of how to interface directly with the octopi API, explore the notebooks/ folder.
69
+
70
+ If you're running octopi on an HPC cluster, several SLURM-compatible submission commands are available. You can view them by running:
71
+ ```
72
+ octopi-slurm --help
73
+ ```
74
+ This provides utilities for submitting training, inference, and localization jobs in SLURM-based environments.
75
+
76
+ ### 📥 Data Import & Preprocessing
77
+
78
+ To train or run inference with octopi, your tomograms must be organized inside a CoPick project. octopi supports two primary methods for data ingestion, both of which include optional Fourier cropping to reduce resolution and accelerate downstream processing.
79
+
80
+ If your tomograms are already processed and stored locally in .mrc format (e.g., from Warp, IMOD, or AreTomo), you can import them into a new or existing CoPick project using:
81
+
82
+ ```
83
+ octopi import-mrc-volumes \
84
+ --input-folder /path/to/mrc/files --config /path/to/config.json \
85
+ --target-tomo-type denoised --input-voxel-size --output-voxel-size 10
86
+ ```
87
+
88
+ octopi also can process tomograms that are hosted on the data portal. Users can download tomograms onto their own remote machine especially if they would like to downsample the tomograms to a lower resolution for speed and memory. You can download and process the tomograms using:
89
+ ```
90
+ octopi download-dataportal \
91
+ --config /path/to/config.json --datasetID 10445 --overlay-path path/to/saved/zarrs \
92
+ --input-voxel-size 5 --output-voxel-size 10 \
93
+ --dataportal-name wbp --target-tomotype wbp
94
+ ```
95
+
96
+ ### 📁 Training Labels Preparation
97
+
98
+ Use `octopi create-targets` to create semantic masks for proteins of interest using annotation metadata. In this example lets generate picks segmentations for dataset 10439 from the CZ cryoET Dataportal (only need to run this step once).
99
+ ```
100
+ octopi create-targets \
101
+ --config config.json \
102
+ --target apoferritin --target beta-galactosidase,slabpick,1 \
103
+ --target ribosome,pytom,0 --target virus-like-particle,pytom,0 \
104
+ --seg-target membrane \
105
+ --tomo-alg wbp --voxel-size 10 \
106
+ --target-session-id 1 --target-segmentation-name remotetargets \
107
+ --target-user-id train-octopi
108
+ ```
109
+
110
+ ### 🧠 Training a single 3D U-Net model
111
+ Train a 3D U-Net model on the prepared datasets using the prepared target segmentations. We can use tomograms derived from multiple copick projects.
112
+ ```
113
+ octopi train-model \
114
+ --config experiment,config1.json \
115
+ --config simulation,config2.json \
116
+ --voxel-size 10 --tomo-alg wbp --Nclass 8 \
117
+ --tomo-batch-size 50 --num-epochs 100 --val-interval 10 \
118
+ --target-info remotetargets,train-octopi,1
119
+ ```
120
+ Outputs will include model weights (.pth), logs, and training metrics.
121
+
122
+ ### 🔍 Model exploration with Optuna
123
+
124
+ octopi🐙 supports automatic neural architecture search using Optuna, enabling efficient discovery of optimal 3D U-Net configurations through Bayesian optimization. This allows users to maximize segmentation accuracy without manual tuning.
125
+
126
+ To launch a model exploration job:
127
+ ```
128
+ octopi model-explore \
129
+ --config experiment,/mnt/dataportal/ml_challenge/config.json \
130
+ --config simulation,/mnt/dataportal/synthetic_ml_challenge/config.json \
131
+ --voxel-size 10 --tomo-alg wbp --Nclass 8 \
132
+ --model-save-path train_results
133
+ ```
134
+ Each trial evaluates a different architecture and logs:
135
+ • Segmentation performance metrics
136
+ • Model weights and configs
137
+ • Training curves and validation loss
138
+
139
+ 🔬 Trials are automatically tracked with MLflow and saved under the specified `--model-save-path`.
140
+
141
+ #### Optuna Dashboard
142
+
143
+ To quickly asses the exploration results and observe which trials results the best architectures, Optuna provides a dashboard that summarizes all the information on a dashboard. The instrucutions to access the dashboard are available here - https://optuna-dashboard.readthedocs.io/en/latest/getting-started.html, it is recommended to use either VS-Code extension or CLI.
144
+
145
+ #### 📊 MLflow experiment tracking
146
+
147
+ To use CZI cloud MLflow tracker, add a `.env` in the root directory like below. You can get a CZI MLflow access token from [here](https://mlflow.cw.use4-prod.si.czi.technology/api/2.0/mlflow/users/access-token) (note that a new token will be generated everytime you open this site).
148
+ ```
149
+ MLFLOW_TRACKING_USERNAME = <Your_CZ_email>
150
+ MLFLOW_TRACKING_PASSWORD = <Your_mlflow_access_token>
151
+ ```
152
+
153
+ octopi supports MLflow for logging and visualizing model training and hyperparameter search results, including:
154
+ • Training loss/validation metrics over time
155
+ • Model hyperparameters and architecture details
156
+ • Trial comparison (e.g., best performing model)
157
+
158
+ You can use either a local MLflow instance, a remote (HPC) instance, or the CZI cloud server:
159
+
160
+ #### 🧪 Local MLflow Dashboard
161
+
162
+ To inspect results locally: `mlflow ui` and open http://localhost:5000 in your browser.
163
+
164
+ #### 🖥️ HPC Cluster MLflow Access (Remote via SSH tunnel)
165
+
166
+ If running octopi on a remote cluster (e.g., Biohub Bruno), forward the MLflow port.
167
+ On your local machine:
168
+ `ssh -L 5000:localhost:5000 remote_username@remote_host` (in the case of Bruno the remote would be `login01.czbiohub.org`).
169
+
170
+ Then on the remote terminal (login node): ` mlflow ui --host 0.0.0.0 --port 5000` to launch the MLFlow dashboard on a local borwser.
171
+
172
+ #### ☁️ CZI coreweave cluser
173
+
174
+ For the CZI coreweave cluser, MLflow is already hosted. Go to the CZI [mlflow server](https://mlflow.cw.use4-prod.si.czi.technology/).
175
+
176
+ 🔐 A .env file is required to authenticate (see Getting Started section).
177
+ 📁 Be sure to register your project name in MLflow before launching runs.
178
+
179
+ ### 🔮 Segmentation
180
+ Generate segmentation prediction masks for tomograms in a given copick project.
181
+ ```
182
+ octopi inference \
183
+ --config config.json \
184
+ --seg-info predict,unet,1 \
185
+ --model-config train_results/best_model_config.yaml \
186
+ --model-weights train_results/best_model.pth \
187
+ --voxel-size 10 --tomo-alg wbp --tomo-batch-size 25
188
+ ```
189
+ Output masks will be saved to the corresponding copick project under the `seg-info` input.
190
+
191
+ ### 📍 Localization
192
+ Convert the segmentation masks into particle coordinates.
193
+ ```
194
+ octopi localize \
195
+ --config config.json \
196
+ --pick-session-id 1 --pick-user-id unet \
197
+ --seg-info predict,unet,1
198
+ ```
199
+
200
+ ## Contributing
201
+
202
+ This project adheres to the Contributor Covenant code of conduct. By participating, you are expected to uphold this code. Please report unacceptable behavior to opensource@chanzuckerberg.com.
203
+
204
+ ## Reporting Security Issues
205
+
206
+ Please note: If you believe you have found a security issue, please responsibly disclose by contacting us at security@chanzuckerberg.com.
207
+
208
+
209
+
octopi-1.0/README.md ADDED
@@ -0,0 +1,173 @@
1
+ # OCTOPI 🐙🐙🐙
2
+ **O**bject dete**CT**ion **O**f **P**rote**I**ns. A deep learning framework for Cryo-ET 3D particle picking with autonomous model exploration capabilities.
3
+
4
+ ## 🚀 Introduction
5
+
6
+ octopi addresses a critical bottleneck in cryo-electron tomography (cryo-ET) research: the efficient identification and extraction of proteins within complex cellular environments. As advances in cryo-ET enable the collection of thousands of tomograms, the need for automated, accurate particle picking has become increasingly urgent.
7
+
8
+ Our deep learning-based pipeline streamlines the training and execution of 3D autoencoder models specifically designed for cryo-ET particle picking. Built on [copick](https://github.com/copick/copick), a storage-agnostic API, octopi seamlessly accesses tomograms and segmentations across local and remote environments.
9
+
10
+ ## 🧩 Features
11
+
12
+ octopi offers a modular, deep learning-driven pipeline for:
13
+ * Training and evaluating custom 3D U-Net models for particle segmentation.
14
+ * Automatically exploring model architectures using Bayesian optimization via Optuna.
15
+ * Performing inference for both semantic segmentation and particle localization.
16
+
17
+ octopi empowers researchers to navigate the dense, intricate landscapes of cryo-ET datasets with unprecedented precision and efficiency without manual trial and error.
18
+
19
+ ## Getting Started
20
+ ### Installation
21
+
22
+ *Octopi* is available on PyPI.
23
+ ```
24
+ pip install octopi
25
+ ```
26
+
27
+ ## 📚 Usage
28
+
29
+ octopi provides a clean, scriptable command-line interface. Run the following command to view all available subcommands:
30
+ ```
31
+ octopi --help
32
+ ```
33
+ Each subcommand supports its own --help flag for detailed usage. To see practical examples of how to interface directly with the octopi API, explore the notebooks/ folder.
34
+
35
+ If you're running octopi on an HPC cluster, several SLURM-compatible submission commands are available. You can view them by running:
36
+ ```
37
+ octopi-slurm --help
38
+ ```
39
+ This provides utilities for submitting training, inference, and localization jobs in SLURM-based environments.
40
+
41
+ ### 📥 Data Import & Preprocessing
42
+
43
+ To train or run inference with octopi, your tomograms must be organized inside a CoPick project. octopi supports two primary methods for data ingestion, both of which include optional Fourier cropping to reduce resolution and accelerate downstream processing.
44
+
45
+ If your tomograms are already processed and stored locally in .mrc format (e.g., from Warp, IMOD, or AreTomo), you can import them into a new or existing CoPick project using:
46
+
47
+ ```
48
+ octopi import-mrc-volumes \
49
+ --input-folder /path/to/mrc/files --config /path/to/config.json \
50
+ --target-tomo-type denoised --input-voxel-size --output-voxel-size 10
51
+ ```
52
+
53
+ octopi also can process tomograms that are hosted on the data portal. Users can download tomograms onto their own remote machine especially if they would like to downsample the tomograms to a lower resolution for speed and memory. You can download and process the tomograms using:
54
+ ```
55
+ octopi download-dataportal \
56
+ --config /path/to/config.json --datasetID 10445 --overlay-path path/to/saved/zarrs \
57
+ --input-voxel-size 5 --output-voxel-size 10 \
58
+ --dataportal-name wbp --target-tomotype wbp
59
+ ```
60
+
61
+ ### 📁 Training Labels Preparation
62
+
63
+ Use `octopi create-targets` to create semantic masks for proteins of interest using annotation metadata. In this example lets generate picks segmentations for dataset 10439 from the CZ cryoET Dataportal (only need to run this step once).
64
+ ```
65
+ octopi create-targets \
66
+ --config config.json \
67
+ --target apoferritin --target beta-galactosidase,slabpick,1 \
68
+ --target ribosome,pytom,0 --target virus-like-particle,pytom,0 \
69
+ --seg-target membrane \
70
+ --tomo-alg wbp --voxel-size 10 \
71
+ --target-session-id 1 --target-segmentation-name remotetargets \
72
+ --target-user-id train-octopi
73
+ ```
74
+
75
+ ### 🧠 Training a single 3D U-Net model
76
+ Train a 3D U-Net model on the prepared datasets using the prepared target segmentations. We can use tomograms derived from multiple copick projects.
77
+ ```
78
+ octopi train-model \
79
+ --config experiment,config1.json \
80
+ --config simulation,config2.json \
81
+ --voxel-size 10 --tomo-alg wbp --Nclass 8 \
82
+ --tomo-batch-size 50 --num-epochs 100 --val-interval 10 \
83
+ --target-info remotetargets,train-octopi,1
84
+ ```
85
+ Outputs will include model weights (.pth), logs, and training metrics.
86
+
87
+ ### 🔍 Model exploration with Optuna
88
+
89
+ octopi🐙 supports automatic neural architecture search using Optuna, enabling efficient discovery of optimal 3D U-Net configurations through Bayesian optimization. This allows users to maximize segmentation accuracy without manual tuning.
90
+
91
+ To launch a model exploration job:
92
+ ```
93
+ octopi model-explore \
94
+ --config experiment,/mnt/dataportal/ml_challenge/config.json \
95
+ --config simulation,/mnt/dataportal/synthetic_ml_challenge/config.json \
96
+ --voxel-size 10 --tomo-alg wbp --Nclass 8 \
97
+ --model-save-path train_results
98
+ ```
99
+ Each trial evaluates a different architecture and logs:
100
+ • Segmentation performance metrics
101
+ • Model weights and configs
102
+ • Training curves and validation loss
103
+
104
+ 🔬 Trials are automatically tracked with MLflow and saved under the specified `--model-save-path`.
105
+
106
+ #### Optuna Dashboard
107
+
108
+ To quickly asses the exploration results and observe which trials results the best architectures, Optuna provides a dashboard that summarizes all the information on a dashboard. The instrucutions to access the dashboard are available here - https://optuna-dashboard.readthedocs.io/en/latest/getting-started.html, it is recommended to use either VS-Code extension or CLI.
109
+
110
+ #### 📊 MLflow experiment tracking
111
+
112
+ To use CZI cloud MLflow tracker, add a `.env` in the root directory like below. You can get a CZI MLflow access token from [here](https://mlflow.cw.use4-prod.si.czi.technology/api/2.0/mlflow/users/access-token) (note that a new token will be generated everytime you open this site).
113
+ ```
114
+ MLFLOW_TRACKING_USERNAME = <Your_CZ_email>
115
+ MLFLOW_TRACKING_PASSWORD = <Your_mlflow_access_token>
116
+ ```
117
+
118
+ octopi supports MLflow for logging and visualizing model training and hyperparameter search results, including:
119
+ • Training loss/validation metrics over time
120
+ • Model hyperparameters and architecture details
121
+ • Trial comparison (e.g., best performing model)
122
+
123
+ You can use either a local MLflow instance, a remote (HPC) instance, or the CZI cloud server:
124
+
125
+ #### 🧪 Local MLflow Dashboard
126
+
127
+ To inspect results locally: `mlflow ui` and open http://localhost:5000 in your browser.
128
+
129
+ #### 🖥️ HPC Cluster MLflow Access (Remote via SSH tunnel)
130
+
131
+ If running octopi on a remote cluster (e.g., Biohub Bruno), forward the MLflow port.
132
+ On your local machine:
133
+ `ssh -L 5000:localhost:5000 remote_username@remote_host` (in the case of Bruno the remote would be `login01.czbiohub.org`).
134
+
135
+ Then on the remote terminal (login node): ` mlflow ui --host 0.0.0.0 --port 5000` to launch the MLFlow dashboard on a local borwser.
136
+
137
+ #### ☁️ CZI coreweave cluser
138
+
139
+ For the CZI coreweave cluser, MLflow is already hosted. Go to the CZI [mlflow server](https://mlflow.cw.use4-prod.si.czi.technology/).
140
+
141
+ 🔐 A .env file is required to authenticate (see Getting Started section).
142
+ 📁 Be sure to register your project name in MLflow before launching runs.
143
+
144
+ ### 🔮 Segmentation
145
+ Generate segmentation prediction masks for tomograms in a given copick project.
146
+ ```
147
+ octopi inference \
148
+ --config config.json \
149
+ --seg-info predict,unet,1 \
150
+ --model-config train_results/best_model_config.yaml \
151
+ --model-weights train_results/best_model.pth \
152
+ --voxel-size 10 --tomo-alg wbp --tomo-batch-size 25
153
+ ```
154
+ Output masks will be saved to the corresponding copick project under the `seg-info` input.
155
+
156
+ ### 📍 Localization
157
+ Convert the segmentation masks into particle coordinates.
158
+ ```
159
+ octopi localize \
160
+ --config config.json \
161
+ --pick-session-id 1 --pick-user-id unet \
162
+ --seg-info predict,unet,1
163
+ ```
164
+
165
+ ## Contributing
166
+
167
+ This project adheres to the Contributor Covenant code of conduct. By participating, you are expected to uphold this code. Please report unacceptable behavior to opensource@chanzuckerberg.com.
168
+
169
+ ## Reporting Security Issues
170
+
171
+ Please note: If you believe you have found a security issue, please responsibly disclose by contacting us at security@chanzuckerberg.com.
172
+
173
+
File without changes
File without changes
@@ -0,0 +1,84 @@
1
+ from monai.transforms import (
2
+ Compose,
3
+ RandFlipd,
4
+ Orientationd,
5
+ RandRotate90d,
6
+ NormalizeIntensityd,
7
+ EnsureChannelFirstd,
8
+ RandCropByLabelClassesd,
9
+ RandScaleIntensityd,
10
+ RandShiftIntensityd,
11
+ RandAdjustContrastd,
12
+ RandGaussianNoised,
13
+ ScaleIntensityRanged,
14
+ RandomOrder,
15
+ )
16
+
17
+ def get_transforms():
18
+ """
19
+ Returns non-random transforms.
20
+ """
21
+ return Compose([
22
+ EnsureChannelFirstd(keys=["image", "label"], channel_dim="no_channel"),
23
+ NormalizeIntensityd(keys="image"),
24
+ Orientationd(keys=["image", "label"], axcodes="RAS")
25
+ ])
26
+
27
+ def get_random_transforms( input_dim, num_samples, Nclasses):
28
+ """
29
+ Input:
30
+ input_dim: tuple of (nx, ny, nz)
31
+ num_samples: int
32
+ Nclasses: int
33
+
34
+ Returns random transforms.
35
+
36
+ For data with a missing wedge along the first axis (causing smearing in that direction),
37
+ we avoid rotations that would move this artifact to other axes. We only rotate around
38
+ the first axis (spatial_axes=[1, 2]) and avoid flipping along the first axis.
39
+ """
40
+ return Compose([
41
+ RandCropByLabelClassesd(
42
+ keys=["image", "label"],
43
+ label_key="label",
44
+ spatial_size=[input_dim[0], input_dim[1], input_dim[2]],
45
+ num_classes=Nclasses,
46
+ num_samples=num_samples
47
+ ),
48
+ # Only rotate around the first axis (keeping the missing wedge orientation consistent)
49
+ RandRotate90d(keys=["image", "label"], prob=0.5, spatial_axes=[1, 2], max_k=3),
50
+ # Avoid flipping along the first axis (where the missing wedge is)
51
+ RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=0), # Removed
52
+ RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=1),
53
+ RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=2),
54
+ RandomOrder([
55
+ # Intensity augmentations are still appropriate
56
+ RandScaleIntensityd(keys="image", prob=0.5, factors=(0.85, 1.15)),
57
+ RandShiftIntensityd(keys="image", prob=0.5, offsets=(-0.15, 0.15)),
58
+ RandAdjustContrastd(keys="image", prob=0.5, gamma=(0.85, 1.15)),
59
+ RandGaussianNoised(keys="image", prob=0.5, mean=0.0, std=0.5), # Reduced noise std
60
+ ]),
61
+ ])
62
+
63
+ # Augmentations to Explore in the Future:
64
+ # Intensity-based augmentations
65
+ # RandHistogramShiftd(keys="image", prob=0.5, num_control_points=(3, 5))
66
+ # RandGaussianSmoothd(keys="image", prob=0.5, sigma_x=(0.5, 1.5), sigma_y=(0.5, 1.5), sigma_z=(0.5, 1.5)),
67
+
68
+ # Geometric Transforms
69
+ # RandAffined(
70
+ # keys=["image", "label"],
71
+ # rotate_range=(0.1, 0.1, 0.1), # Rotation angles (radians) for x, y, z axes
72
+ # scale_range=(0.1, 0.1, 0.1), # Scale range for isotropic/anisotropic scaling
73
+ # prob=0.5, # Probability of applying the transform
74
+ # padding_mode="border" # Handle out-of-bounds values
75
+ # )
76
+
77
+ def get_predict_transforms():
78
+ """
79
+ Returns predict transforms.
80
+ """
81
+ return Compose([
82
+ EnsureChannelFirstd(keys=["image"], channel_dim="no_channel"),
83
+ NormalizeIntensityd(keys="image")
84
+ ])
@@ -0,0 +1,113 @@
1
+ from typing import List, Tuple, Callable, Optional, Dict, Any
2
+ from monai.transforms import Compose
3
+ from monai.data import CacheDataset
4
+ from octopi import io
5
+ from tqdm import tqdm
6
+ import os, sys
7
+
8
+ class MultiConfigCacheDataset(CacheDataset):
9
+ """
10
+ A custom CacheDataset that loads data lazily from multiple sources
11
+ with consolidated loading and caching process.
12
+ """
13
+
14
+ def __init__(
15
+ self,
16
+ manager,
17
+ run_ids: List[Tuple[str, str]],
18
+ transform: Optional[Callable] = None,
19
+ cache_rate: float = 1.0,
20
+ num_workers: int = 0,
21
+ progress: bool = True,
22
+ copy_cache: bool = True,
23
+ cache_num: int = sys.maxsize
24
+ ):
25
+ # Save reference to manager and run_ids
26
+ self.manager = manager
27
+ self.run_ids = run_ids
28
+ self.progress = progress
29
+
30
+ # Prepare empty data list first - don't load immediately
31
+ self.data = []
32
+
33
+ # Initialize the parent CacheDataset with an empty list
34
+ # We'll override the _fill_cache method to handle loading and caching in one step
35
+ super().__init__(
36
+ data=[], # Empty list - we'll load data in _fill_cache
37
+ transform=transform,
38
+ cache_rate=cache_rate,
39
+ num_workers=num_workers,
40
+ progress=False, # We'll handle our own progress
41
+ copy_cache=copy_cache,
42
+ cache_num=cache_num
43
+ )
44
+
45
+ def _fill_cache(self):
46
+ """
47
+ Override the parent's _fill_cache method to combine loading and caching.
48
+ """
49
+ if self.progress:
50
+ print("Loading and caching dataset...")
51
+
52
+ # Load and process data in a single operation
53
+ self.data = []
54
+ iterator = tqdm(self.run_ids, desc="Loading dataset") if self.progress else self.run_ids
55
+
56
+ for session_name, run_name in iterator:
57
+ root = self.manager.roots[session_name]
58
+ batch_data = io.load_training_data(
59
+ root,
60
+ [run_name],
61
+ self.manager.voxel_size,
62
+ self.manager.tomo_algorithm,
63
+ self.manager.target_name,
64
+ self.manager.target_session_id,
65
+ self.manager.target_user_id,
66
+ progress_update=False
67
+ )
68
+
69
+ self.data.extend(batch_data)
70
+
71
+ # Process and cache this batch right away
72
+ for i, item in enumerate(batch_data):
73
+ if len(self._cache) < self.cache_num and self.cache_rate > 0.0:
74
+ if np.random.random() < self.cache_rate:
75
+ self._cache.append(self._transform(item))
76
+
77
+ # Check max label value if needed
78
+ if hasattr(self.manager, '_check_max_label_value'):
79
+ self.manager._check_max_label_value(self.data)
80
+
81
+ # Update the _data attribute to match the loaded data
82
+ self._data = self.data
83
+
84
+ def __len__(self):
85
+ """
86
+ Return the length of the dataset.
87
+ """
88
+ if not self.data:
89
+ self._fill_cache() # Load data if not loaded yet
90
+ return len(self.data)
91
+
92
+ def __getitem__(self, index):
93
+ """
94
+ Return the item at the given index.
95
+ """
96
+ if not self.data:
97
+ self._fill_cache() # Load data if not loaded yet
98
+
99
+ # Use parent's logic for cached items
100
+ if index < len(self._cache):
101
+ return self._cache[index]
102
+
103
+ # Otherwise transform on-the-fly
104
+ return self._transform(self.data[index])
105
+
106
+ # TODO: Implement Single Config Cache Dataset
107
+ # class SingleConfigCacheDataset(CacheDataset):
108
+ # def __init__(self,
109
+ # root: Any,
110
+ # run_ids: List[str],
111
+ # voxel_size: float,
112
+ # tomo_algorithm: str,
113
+ # target_name: str,
@@ -0,0 +1,19 @@
1
+ from torch.utils.data import Dataset
2
+
3
+ class DynamicDataset(Dataset):
4
+ def __init__(self, data, transform=None):
5
+ self.data = data
6
+ self.transform = transform
7
+
8
+ def __len__(self):
9
+ return len(self.data)
10
+
11
+ def __getitem__(self, idx):
12
+ sample = self.data[idx]
13
+ if self.transform:
14
+ sample = self.transform(sample)
15
+ return sample
16
+
17
+ def update_data(self, new_data):
18
+ """Update the internal dataset with new data."""
19
+ self.data = new_data