copick-utils 1.0.2__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {copick_utils-1.0.2 → copick_utils-1.2.0}/.github/workflows/conventional-commits.yml +1 -1
- copick_utils-1.2.0/.release-please.manifest.json +3 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/CHANGELOG.md +24 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/PKG-INFO +2 -1
- {copick_utils-1.0.2 → copick_utils-1.2.0}/pyproject.toml +5 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/__init__.py +1 -1
- copick_utils-1.2.0/src/copick_utils/cli/download.py +34 -0
- copick_utils-1.2.0/src/copick_utils/cli/download_commands.py +11 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/processing_commands.py +2 -0
- copick_utils-1.2.0/src/copick_utils/cli/split_labels.py +148 -0
- copick_utils-1.2.0/src/copick_utils/io/portal.py +149 -0
- copick_utils-1.2.0/src/copick_utils/io/readers.py +200 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/io/writers.py +9 -9
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/process/__init__.py +0 -4
- copick_utils-1.2.0/src/copick_utils/process/split_labels.py +214 -0
- copick_utils-1.0.2/.release-please.manifest.json +0 -3
- copick_utils-1.0.2/src/copick_utils/io/readers.py +0 -135
- {copick_utils-1.0.2 → copick_utils-1.2.0}/.github/dependabot.yml +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/.github/workflows/py-formatting.yml +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/.github/workflows/release-please.yml +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/.gitignore +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/.pre-commit-config.yaml +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/LICENSE +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/README.md +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/SECURITY.md +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/examples/segmentation_example.ipynb +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/release-please.config.json +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/__init__.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/clipmesh.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/clippicks.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/clipseg.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/conversion_commands.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/enclosed.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/filter_components.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/fit_spline.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/hull.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/input_output_selection.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/logical_commands.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/mesh2picks.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/mesh2seg.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/meshop.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/picks2ellipsoid.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/picks2mesh.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/picks2plane.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/picks2seg.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/picks2sphere.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/picks2surface.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/picksin.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/picksout.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/seg2mesh.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/seg2picks.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/segop.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/separate_components.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/skeletonize.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/util.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/validbox.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/__init__.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/converter_common.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/ellipsoid_from_picks.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/lazy_converter.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/mesh_from_picks.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/mesh_from_segmentation.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/picks_from_mesh.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/picks_from_segmentation.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/plane_from_picks.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/segmentation_from_mesh.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/segmentation_from_picks.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/sphere_from_picks.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/surface_from_picks.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/features/__init__.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/features/skimage.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/io/__init__.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/logical/__init__.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/logical/distance_operations.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/logical/enclosed_operations.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/logical/mesh_operations.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/logical/point_operations.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/logical/segmentation_operations.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/pickers/__init__.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/pickers/grid_picker.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/process/connected_components.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/process/filter_components.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/process/hull.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/process/skeletonize.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/process/spline_fitting.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/process/validbox.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/util/__init__.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/util/config_models.py +0 -0
- {copick_utils-1.0.2 → copick_utils-1.2.0}/tests/__init__.py +0 -0
|
@@ -1,5 +1,29 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [1.2.0](https://github.com/copick/copick-utils/compare/copick-utils-v1.1.0...copick-utils-v1.2.0) (2026-01-31)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### ✨ Features
|
|
7
|
+
|
|
8
|
+
* add parser uri to readers, add function to download portal project fo… ([#43](https://github.com/copick/copick-utils/issues/43)) ([7743ee5](https://github.com/copick/copick-utils/commit/7743ee5facb6dff037f8348852c6edcd105abe25))
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### 🐞 Bug Fixes
|
|
12
|
+
|
|
13
|
+
* URI in copick readers ([#45](https://github.com/copick/copick-utils/issues/45)) ([926f01d](https://github.com/copick/copick-utils/commit/926f01dfccb537bca90fd30ee59ceacb83d7f947))
|
|
14
|
+
|
|
15
|
+
## [1.1.0](https://github.com/copick/copick-utils/compare/copick-utils-v1.0.2...copick-utils-v1.1.0) (2026-01-26)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
### ✨ Features
|
|
19
|
+
|
|
20
|
+
* Add split command. ([#41](https://github.com/copick/copick-utils/issues/41)) ([479bf48](https://github.com/copick/copick-utils/commit/479bf48f9a50e3eb4066b3a41f2b399f110b2553))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
### 🐞 Bug Fixes
|
|
24
|
+
|
|
25
|
+
* bump chanzuckerberg/github-actions from 6.12.2 to 6.13.0 ([#40](https://github.com/copick/copick-utils/issues/40)) ([68b21c1](https://github.com/copick/copick-utils/commit/68b21c175e2ca96946ceba9252b86c17e3382fe2))
|
|
26
|
+
|
|
3
27
|
## [1.0.2](https://github.com/copick/copick-utils/compare/copick-utils-v1.0.1...copick-utils-v1.0.2) (2026-01-06)
|
|
4
28
|
|
|
5
29
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: copick-utils
|
|
3
|
-
Version: 1.0
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Utilities for copick
|
|
5
5
|
Project-URL: Repository, https://github.com/KyleHarrington/copick-utils.git
|
|
6
6
|
Project-URL: Issues, https://github.com/KyleHarrington/copick-utils/issues
|
|
@@ -33,6 +33,7 @@ Requires-Dist: click-option-group
|
|
|
33
33
|
Requires-Dist: copick>=1.16.0
|
|
34
34
|
Requires-Dist: manifold3d
|
|
35
35
|
Requires-Dist: mapbox-earcut
|
|
36
|
+
Requires-Dist: mdocfile
|
|
36
37
|
Requires-Dist: numpy
|
|
37
38
|
Requires-Dist: rtree
|
|
38
39
|
Requires-Dist: scikit-image
|
|
@@ -29,6 +29,7 @@ dependencies = [
|
|
|
29
29
|
"trimesh",
|
|
30
30
|
"manifold3d",
|
|
31
31
|
"mapbox-earcut",
|
|
32
|
+
"mdocfile",
|
|
32
33
|
"tqdm",
|
|
33
34
|
"scikit-learn",
|
|
34
35
|
"shapely",
|
|
@@ -76,6 +77,7 @@ skeletonize = "copick_utils.cli.processing_commands:skeletonize"
|
|
|
76
77
|
fit_spline = "copick_utils.cli.processing_commands:fit_spline"
|
|
77
78
|
validbox = "copick_utils.cli.processing_commands:validbox"
|
|
78
79
|
hull = "copick_utils.cli.processing_commands:hull"
|
|
80
|
+
split = "copick_utils.cli.processing_commands:split"
|
|
79
81
|
|
|
80
82
|
[project.entry-points."copick.logical.commands"]
|
|
81
83
|
meshop = "copick_utils.cli.logical_commands:meshop"
|
|
@@ -87,6 +89,9 @@ clippicks = "copick_utils.cli.logical_commands:clippicks"
|
|
|
87
89
|
picksin = "copick_utils.cli.logical_commands:picksin"
|
|
88
90
|
picksout = "copick_utils.cli.logical_commands:picksout"
|
|
89
91
|
|
|
92
|
+
[project.entry-points."copick.download.commands"]
|
|
93
|
+
project = "copick_utils.cli.download_commands:project"
|
|
94
|
+
|
|
90
95
|
[tool.hatch.version]
|
|
91
96
|
path = "src/copick_utils/__init__.py"
|
|
92
97
|
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import click
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@click.command(
|
|
5
|
+
context_settings={"show_default": True},
|
|
6
|
+
short_help="Download tilt series and alignments from the CryoET Data Portal.",
|
|
7
|
+
no_args_is_help=True,
|
|
8
|
+
)
|
|
9
|
+
@click.option(
|
|
10
|
+
"-ds",
|
|
11
|
+
"--dataset",
|
|
12
|
+
required=True,
|
|
13
|
+
type=str,
|
|
14
|
+
help="Dataset ID to download from the CryoET Data Portal.",
|
|
15
|
+
)
|
|
16
|
+
@click.option(
|
|
17
|
+
"-o",
|
|
18
|
+
"--output",
|
|
19
|
+
required=True,
|
|
20
|
+
default=".",
|
|
21
|
+
type=str,
|
|
22
|
+
help="Output directory to save the downloaded files.",
|
|
23
|
+
)
|
|
24
|
+
def project(dataset: str, output: str):
|
|
25
|
+
"""
|
|
26
|
+
Download tilt series and alignments from the CryoET Data Portal for sub-tomogram averaging with py2rely.
|
|
27
|
+
"""
|
|
28
|
+
download_project(dataset, output)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def download_project(dataset: str, output: str):
|
|
32
|
+
import copick_utils.io.portal as portal
|
|
33
|
+
|
|
34
|
+
portal.download_aretomo_files(dataset, output)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""CLI commands for downloading data from the CryoET Data Portal.
|
|
2
|
+
|
|
3
|
+
This module imports all download commands from specialized files for better organization.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from copick_utils.cli.download import project
|
|
7
|
+
|
|
8
|
+
# All commands are now available for import by the main CLI
|
|
9
|
+
__all__ = [
|
|
10
|
+
"project",
|
|
11
|
+
]
|
|
@@ -5,6 +5,7 @@ from copick_utils.cli.fit_spline import fit_spline
|
|
|
5
5
|
from copick_utils.cli.hull import hull
|
|
6
6
|
from copick_utils.cli.separate_components import separate_components
|
|
7
7
|
from copick_utils.cli.skeletonize import skeletonize
|
|
8
|
+
from copick_utils.cli.split_labels import split
|
|
8
9
|
from copick_utils.cli.validbox import validbox
|
|
9
10
|
|
|
10
11
|
# All commands are now available for import by the main CLI
|
|
@@ -15,4 +16,5 @@ __all__ = [
|
|
|
15
16
|
"separate_components",
|
|
16
17
|
"filter_components",
|
|
17
18
|
"fit_spline",
|
|
19
|
+
"split",
|
|
18
20
|
]
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""CLI command for splitting multilabel segmentations into individual single-class segmentations."""
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
import copick
|
|
5
|
+
from click_option_group import optgroup
|
|
6
|
+
from copick.cli.util import add_config_option, add_debug_option
|
|
7
|
+
from copick.util.log import get_logger
|
|
8
|
+
from copick.util.uri import parse_copick_uri
|
|
9
|
+
|
|
10
|
+
from copick_utils.cli.util import add_input_option, add_workers_option
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@click.command(
|
|
14
|
+
context_settings={"show_default": True},
|
|
15
|
+
short_help="Split multilabel segmentations into single-class segmentations.",
|
|
16
|
+
no_args_is_help=True,
|
|
17
|
+
)
|
|
18
|
+
@add_config_option
|
|
19
|
+
@optgroup.group("\nInput Options", help="Options related to the input segmentation.")
|
|
20
|
+
@optgroup.option(
|
|
21
|
+
"--run-names",
|
|
22
|
+
"-r",
|
|
23
|
+
multiple=True,
|
|
24
|
+
help="Specific run names to process (default: all runs).",
|
|
25
|
+
)
|
|
26
|
+
@add_input_option("segmentation")
|
|
27
|
+
@optgroup.group("\nTool Options", help="Options related to this tool.")
|
|
28
|
+
@add_workers_option
|
|
29
|
+
@optgroup.group("\nOutput Options", help="Options related to output segmentations.")
|
|
30
|
+
@optgroup.option(
|
|
31
|
+
"--output-user-id",
|
|
32
|
+
type=str,
|
|
33
|
+
default="split",
|
|
34
|
+
help="User ID for output segmentations.",
|
|
35
|
+
)
|
|
36
|
+
@add_debug_option
|
|
37
|
+
def split(
|
|
38
|
+
config,
|
|
39
|
+
run_names,
|
|
40
|
+
input_uri,
|
|
41
|
+
workers,
|
|
42
|
+
output_user_id,
|
|
43
|
+
debug,
|
|
44
|
+
):
|
|
45
|
+
"""
|
|
46
|
+
Split multilabel segmentations into individual single-class binary segmentations.
|
|
47
|
+
|
|
48
|
+
This command takes a multilabel segmentation and creates separate binary segmentations
|
|
49
|
+
for each label value. Each output segmentation is named after the corresponding
|
|
50
|
+
PickableObject (as defined in the copick config) and uses the same session ID as
|
|
51
|
+
the input.
|
|
52
|
+
|
|
53
|
+
\b
|
|
54
|
+
URI Format:
|
|
55
|
+
Segmentations: name:user_id/session_id@voxel_spacing
|
|
56
|
+
|
|
57
|
+
\b
|
|
58
|
+
Label-to-Object Mapping:
|
|
59
|
+
The tool looks up each label value in the pickable_objects configuration
|
|
60
|
+
and uses the object name for the output segmentation:
|
|
61
|
+
- Label 1 (ribosome) → ribosome:split/session-001@10.0
|
|
62
|
+
- Label 2 (membrane) → membrane:split/session-001@10.0
|
|
63
|
+
- Label 3 (proteasome) → proteasome:split/session-001@10.0
|
|
64
|
+
|
|
65
|
+
\b
|
|
66
|
+
Examples:
|
|
67
|
+
# Split multilabel segmentation (outputs named by pickable objects)
|
|
68
|
+
copick process split -i "predictions:model/run-001@10.0"
|
|
69
|
+
|
|
70
|
+
# Split with custom output user ID
|
|
71
|
+
copick process split -i "classes:annotator/manual@10.0" --output-user-id "per-class"
|
|
72
|
+
|
|
73
|
+
# Process specific runs only
|
|
74
|
+
copick process split -i "labels:*/*@10.0" --run-names TS_001 --run-names TS_002
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
logger = get_logger(__name__, debug=debug)
|
|
78
|
+
|
|
79
|
+
root = copick.from_file(config)
|
|
80
|
+
run_names_list = list(run_names) if run_names else None
|
|
81
|
+
|
|
82
|
+
# Parse input URI
|
|
83
|
+
try:
|
|
84
|
+
input_params = parse_copick_uri(input_uri, "segmentation")
|
|
85
|
+
except ValueError as e:
|
|
86
|
+
raise click.BadParameter(f"Invalid input URI: {e}") from e
|
|
87
|
+
|
|
88
|
+
segmentation_name = input_params["name"]
|
|
89
|
+
segmentation_user_id = input_params["user_id"]
|
|
90
|
+
segmentation_session_id = input_params["session_id"]
|
|
91
|
+
voxel_spacing = input_params.get("voxel_spacing")
|
|
92
|
+
|
|
93
|
+
if voxel_spacing is None or voxel_spacing == "*":
|
|
94
|
+
raise click.BadParameter("Input URI must include a specific voxel spacing (e.g., @10.0)")
|
|
95
|
+
|
|
96
|
+
# Check for patterns in critical fields
|
|
97
|
+
if "*" in segmentation_name or "*" in segmentation_user_id or "*" in segmentation_session_id:
|
|
98
|
+
raise click.BadParameter(
|
|
99
|
+
"Input URI cannot contain wildcards for splitting. "
|
|
100
|
+
"Please specify exact segmentation name, user_id, and session_id.",
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
logger.info(f"Splitting multilabel segmentation '{segmentation_name}'")
|
|
104
|
+
logger.debug(f"Input: {segmentation_user_id}/{segmentation_session_id} @ {voxel_spacing}Å")
|
|
105
|
+
logger.debug(f"Output user ID: {output_user_id}")
|
|
106
|
+
logger.debug(f"Workers: {workers}")
|
|
107
|
+
|
|
108
|
+
# Import batch function
|
|
109
|
+
from copick_utils.process.split_labels import split_labels_batch
|
|
110
|
+
|
|
111
|
+
# Process runs
|
|
112
|
+
results = split_labels_batch(
|
|
113
|
+
root=root,
|
|
114
|
+
segmentation_name=segmentation_name,
|
|
115
|
+
segmentation_user_id=segmentation_user_id,
|
|
116
|
+
segmentation_session_id=segmentation_session_id,
|
|
117
|
+
voxel_spacing=float(voxel_spacing),
|
|
118
|
+
output_user_id=output_user_id,
|
|
119
|
+
run_names=run_names_list,
|
|
120
|
+
workers=workers,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Aggregate results
|
|
124
|
+
successful = sum(1 for result in results.values() if result and result.get("processed", 0) > 0)
|
|
125
|
+
total_labels = sum(result.get("labels_split", 0) for result in results.values() if result)
|
|
126
|
+
|
|
127
|
+
# Collect all unique object names created
|
|
128
|
+
all_object_names = set()
|
|
129
|
+
for result in results.values():
|
|
130
|
+
if result and result.get("object_names"):
|
|
131
|
+
all_object_names.update(result["object_names"])
|
|
132
|
+
|
|
133
|
+
# Collect all errors
|
|
134
|
+
all_errors = []
|
|
135
|
+
for result in results.values():
|
|
136
|
+
if result and result.get("errors"):
|
|
137
|
+
all_errors.extend(result["errors"])
|
|
138
|
+
|
|
139
|
+
logger.info(f"Completed: {successful}/{len(results)} runs processed successfully")
|
|
140
|
+
logger.info(f"Total labels split: {total_labels}")
|
|
141
|
+
logger.info(f"Object names created: {', '.join(sorted(all_object_names))}")
|
|
142
|
+
|
|
143
|
+
if all_errors:
|
|
144
|
+
logger.warning(f"Encountered {len(all_errors)} errors during processing")
|
|
145
|
+
for error in all_errors[:5]: # Show first 5 errors
|
|
146
|
+
logger.warning(f" - {error}")
|
|
147
|
+
if len(all_errors) > 5:
|
|
148
|
+
logger.warning(f" ... and {len(all_errors) - 5} more errors")
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""
|
|
2
|
+
A minimal example using minimal libraries / imports to download relevant AreTomo files
|
|
3
|
+
from the CryoET Data Portal. Downloads the corresponding files, using the run ID as the
|
|
4
|
+
base filename.
|
|
5
|
+
|
|
6
|
+
Original implementation by Daniel Ji and Utz Ermel.
|
|
7
|
+
"""
|
|
8
|
+
import multiprocessing
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
import cryoet_data_portal as cdp
|
|
12
|
+
import mdocfile
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pandas as pd
|
|
15
|
+
import requests
|
|
16
|
+
import s3fs
|
|
17
|
+
|
|
18
|
+
global_client = cdp.Client()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def download_aretomo_files(dataset_id: int, output_dir: str):
|
|
22
|
+
print(f"Fetching tiltseries for dataset id {dataset_id}...", flush=True)
|
|
23
|
+
tiltseries_list: list[cdp.TiltSeries] = [
|
|
24
|
+
tiltseries for run in cdp.Dataset.get_by_id(global_client, dataset_id).runs for tiltseries in run.tiltseries
|
|
25
|
+
] # a bit slow for some reason, can take some time
|
|
26
|
+
tiltseries_run_ids_and_ts_ids = [(ts.run.id, ts.id) for ts in tiltseries_list]
|
|
27
|
+
print(
|
|
28
|
+
f"Found {len(tiltseries_run_ids_and_ts_ids)} tiltseries for dataset id {dataset_id}. Starting downloads...",
|
|
29
|
+
flush=True,
|
|
30
|
+
)
|
|
31
|
+
with multiprocessing.Pool(processes=8) as pool: # adjust number of processes as needed
|
|
32
|
+
for _ in pool.imap_unordered(
|
|
33
|
+
_worker_download_aretomo_files_for_tiltseries,
|
|
34
|
+
[
|
|
35
|
+
(dataset_id, run_name, output_dir, tiltseries_id)
|
|
36
|
+
for run_name, tiltseries_id in tiltseries_run_ids_and_ts_ids
|
|
37
|
+
],
|
|
38
|
+
):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _worker_download_aretomo_files_for_tiltseries(args):
|
|
43
|
+
dataset_id, run_name, output_dir, tiltseries_id = args
|
|
44
|
+
download_aretomo_files_for_tiltseries(dataset_id, run_name, output_dir, tiltseries_id)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# note: this function assumes that there is only one tiltseries per run
|
|
48
|
+
# note: the tiltseries name is equivlaent to the run name
|
|
49
|
+
# if tiltseries_id is provided, will be prioritized over dataset_id + run_name
|
|
50
|
+
def download_aretomo_files_for_tiltseries(dataset_id: int, run_name: str, output_dir: str, tiltseries_id: int = None):
|
|
51
|
+
print(f"[{run_name}] Downloading AreTomo files for tiltseries id {tiltseries_id}...", flush=True)
|
|
52
|
+
|
|
53
|
+
client = cdp.Client()
|
|
54
|
+
s3 = s3fs.S3FileSystem(anon=True)
|
|
55
|
+
if not tiltseries_id:
|
|
56
|
+
all_tiltseries = cdp.TiltSeries.find(
|
|
57
|
+
client,
|
|
58
|
+
query_filters=[cdp.TiltSeries.run.dataset_id == dataset_id, cdp.TiltSeries.run.name == run_name],
|
|
59
|
+
)
|
|
60
|
+
if len(all_tiltseries) == 0:
|
|
61
|
+
raise ValueError(f"No tiltseries found for dataset_id {dataset_id} and run_name {run_name}")
|
|
62
|
+
if len(all_tiltseries) > 1:
|
|
63
|
+
raise ValueError(f"Multiple tiltseries found for dataset_id {dataset_id} and run_name {run_name}")
|
|
64
|
+
tiltseries = all_tiltseries[0]
|
|
65
|
+
else:
|
|
66
|
+
tiltseries = cdp.TiltSeries.get_by_id(client, tiltseries_id)
|
|
67
|
+
|
|
68
|
+
# get the s3 folder path and then glob for *.tlt / *.rawtlt files to download them, renaming the base to match the run id
|
|
69
|
+
s3_folder_path = tiltseries.s3_mrc_file.rsplit("/", 1)[0] + "/"
|
|
70
|
+
tlt_files = s3.glob(s3_folder_path + "*.tlt") + s3.glob(s3_folder_path + "*.rawtlt")
|
|
71
|
+
for tlt_file in tlt_files:
|
|
72
|
+
base_name = os.path.basename(tlt_file)
|
|
73
|
+
ext = os.path.splitext(base_name)[1]
|
|
74
|
+
dest_file = os.path.join(output_dir, f"{tiltseries.run.id}{ext}")
|
|
75
|
+
s3.get(tlt_file, dest_file)
|
|
76
|
+
print(f"[{tiltseries.run.id}] Downloaded {base_name} as {os.path.basename(dest_file)}.", flush=True)
|
|
77
|
+
|
|
78
|
+
# do the same for "*CTF*.txt" files and "*ctf*.txt" files
|
|
79
|
+
ctf_files = s3.glob(s3_folder_path + "*CTF*.txt") + s3.glob(s3_folder_path + "*ctf*.txt")
|
|
80
|
+
if len(ctf_files) == 0:
|
|
81
|
+
print(f"WARNING: No CTF files found for tiltseries id {tiltseries.id}")
|
|
82
|
+
else:
|
|
83
|
+
ctf_file = ctf_files[0]
|
|
84
|
+
base_name = os.path.basename(ctf_file)
|
|
85
|
+
if len(ctf_files) > 1:
|
|
86
|
+
print(f"WARNING: Multiple CTF files found for tiltseries id {tiltseries.id}, using {base_name}")
|
|
87
|
+
ext = os.path.splitext(base_name)[1]
|
|
88
|
+
dest_file = os.path.join(output_dir, f"{tiltseries.run.id}_CTF.txt")
|
|
89
|
+
s3.get(ctf_file, dest_file)
|
|
90
|
+
print(f"[{tiltseries.run.id}] Downloaded {base_name} as {os.path.basename(dest_file)}.", flush=True)
|
|
91
|
+
|
|
92
|
+
# now find the corresponding alignment for this tiltseries and download the "*.aln" file
|
|
93
|
+
if len(tiltseries.alignments) == 0:
|
|
94
|
+
print(f"WARNING: No alignments found for tiltseries id {tiltseries.id}")
|
|
95
|
+
elif len(tiltseries.alignments) > 1:
|
|
96
|
+
print(f"WARNING: Multiple alignments found for tiltseries id {tiltseries.id}")
|
|
97
|
+
else:
|
|
98
|
+
alignment = tiltseries.alignments[0]
|
|
99
|
+
s3_alignment_folder_path = alignment.s3_alignment_metadata.rsplit("/", 1)[0] + "/"
|
|
100
|
+
aln_files = s3.glob(s3_alignment_folder_path + "*.aln")
|
|
101
|
+
if len(aln_files) == 0:
|
|
102
|
+
raise ValueError(f"No .aln files found for run name {tiltseries.run.name} and alignment id {alignment.id}")
|
|
103
|
+
aln_file = aln_files[0]
|
|
104
|
+
base_name = os.path.basename(aln_file)
|
|
105
|
+
if len(aln_files) > 1:
|
|
106
|
+
print(f"WARNING: Multiple .aln files found for run name {tiltseries.run.name}, using {base_name}")
|
|
107
|
+
ext = os.path.splitext(base_name)[1]
|
|
108
|
+
dest_file = os.path.join(output_dir, f"{tiltseries.run.id}{ext}")
|
|
109
|
+
s3.get(aln_file, dest_file)
|
|
110
|
+
print(f"[{tiltseries.run.id}] Downloaded {base_name} as {os.path.basename(dest_file)}.", flush=True)
|
|
111
|
+
|
|
112
|
+
# now get the mdoc file from the Frames/ folder
|
|
113
|
+
frames = tiltseries.run.frames
|
|
114
|
+
if len(frames) == 0:
|
|
115
|
+
raise ValueError(f"No frames found for run name {tiltseries.run.name}")
|
|
116
|
+
frame = frames[0]
|
|
117
|
+
s3_frames_folder_path = frame.s3_frame_path.rsplit("/", 1)[0] + "/"
|
|
118
|
+
mdoc_files = s3.glob(s3_frames_folder_path + "*.mdoc")
|
|
119
|
+
if len(mdoc_files) == 0:
|
|
120
|
+
raise ValueError(f"No .mdoc files found for run name {tiltseries.run.name}")
|
|
121
|
+
mdoc_file = mdoc_files[0]
|
|
122
|
+
base_name = os.path.basename(mdoc_file)
|
|
123
|
+
if len(mdoc_files) > 1:
|
|
124
|
+
print(f"WARNING: Multiple .mdoc files found for run name {tiltseries.run.name}, using {base_name}")
|
|
125
|
+
ext = os.path.splitext(base_name)[1]
|
|
126
|
+
dest_file = os.path.join(output_dir, f"{tiltseries.run.id}{ext}")
|
|
127
|
+
s3.get(mdoc_file, dest_file)
|
|
128
|
+
print(f"[{tiltseries.run.id}] Downloaded {base_name} as {os.path.basename(dest_file)}.", flush=True)
|
|
129
|
+
|
|
130
|
+
# download tiltseries mrc file
|
|
131
|
+
tiltseries_file = os.path.join(output_dir, f"{tiltseries.run.id}.mrc")
|
|
132
|
+
tiltseries_url = tiltseries.https_mrc_file
|
|
133
|
+
response = requests.get(tiltseries_url, stream=True)
|
|
134
|
+
response.raise_for_status()
|
|
135
|
+
with open(tiltseries_file, "wb") as f:
|
|
136
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
137
|
+
f.write(chunk)
|
|
138
|
+
print(f"[{tiltseries.run.id}] Downloaded tiltseries mrc file as {os.path.basename(tiltseries_file)}.", flush=True)
|
|
139
|
+
|
|
140
|
+
# create imod file for order list
|
|
141
|
+
mdoc = mdocfile.read(os.path.join(output_dir, f"{tiltseries.run.id}.mdoc"))
|
|
142
|
+
order_list = mdoc["TiltAngle"]
|
|
143
|
+
imodpath = os.path.join(output_dir, f"{tiltseries.run.id}_Imod")
|
|
144
|
+
os.makedirs(imodpath, exist_ok=True)
|
|
145
|
+
number = np.arange(len(order_list)) + 1
|
|
146
|
+
|
|
147
|
+
# save in csv with 'ImageNumber', 'TiltAngle' headers
|
|
148
|
+
df = pd.DataFrame({"ImageNumber": number, "TiltAngle": order_list})
|
|
149
|
+
df.to_csv(os.path.join(imodpath, f"{tiltseries.run.id}_order_list.csv"), index=False)
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from copick.util.uri import resolve_copick_objects
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def tomogram(run, voxel_size: float = 10, algorithm: str = "wbp", raise_error: bool = False, verbose=True):
|
|
6
|
+
"""
|
|
7
|
+
Reads a tomogram from a Copick run.
|
|
8
|
+
|
|
9
|
+
Parameters:
|
|
10
|
+
-----------
|
|
11
|
+
run: copick.Run
|
|
12
|
+
voxel_size: float
|
|
13
|
+
algorithm: str
|
|
14
|
+
raise_error: bool
|
|
15
|
+
verbose: bool
|
|
16
|
+
Returns:
|
|
17
|
+
--------
|
|
18
|
+
vol: np.ndarray - The tomogram.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
# Get the tomogram from the Copick URI
|
|
22
|
+
try:
|
|
23
|
+
uri = f"{algorithm}@{voxel_size}"
|
|
24
|
+
vol = resolve_copick_objects(uri, run.root, "tomogram", run_name=run.name)
|
|
25
|
+
return vol[0].numpy()
|
|
26
|
+
except Exception as err: # Report which orbject is missing
|
|
27
|
+
# Try to resolve the tomogram using the Copick URI
|
|
28
|
+
voxel_spacing_obj = run.get_voxel_spacing(voxel_size)
|
|
29
|
+
|
|
30
|
+
if voxel_spacing_obj is None:
|
|
31
|
+
# Query Avaiable Voxel Spacings
|
|
32
|
+
availableVoxelSpacings = [tomo.voxel_size for tomo in run.voxel_spacings]
|
|
33
|
+
|
|
34
|
+
# Report to the user which voxel spacings they can use
|
|
35
|
+
message = (
|
|
36
|
+
f"[Warning] No tomogram found for {run.name} with uri: {uri}\n"
|
|
37
|
+
f"Available voxel sizes are: {', '.join(map(str, availableVoxelSpacings))}"
|
|
38
|
+
)
|
|
39
|
+
if raise_error:
|
|
40
|
+
raise ValueError(message) from err
|
|
41
|
+
elif verbose:
|
|
42
|
+
print(message)
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
tomogram = voxel_spacing_obj.get_tomogram(algorithm)
|
|
46
|
+
if tomogram is None:
|
|
47
|
+
# Get available algorithms
|
|
48
|
+
availableAlgorithms = [tomo.tomo_type for tomo in run.get_voxel_spacing(voxel_size).tomograms]
|
|
49
|
+
|
|
50
|
+
# Report to the user which algorithms are available
|
|
51
|
+
message = (
|
|
52
|
+
f"[Warning] No tomogram found for {run.name} with uri: {uri}\n"
|
|
53
|
+
f"Available algorithms @{voxel_size}A are: {', '.join(availableAlgorithms)}"
|
|
54
|
+
)
|
|
55
|
+
if raise_error:
|
|
56
|
+
raise ValueError(message) from err
|
|
57
|
+
elif verbose:
|
|
58
|
+
print(message)
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def segmentation(run, voxel_spacing: float, name: str, user_id=None, session_id=None, raise_error=False, verbose=True):
|
|
63
|
+
"""
|
|
64
|
+
Reads a segmentation from a Copick run.
|
|
65
|
+
|
|
66
|
+
Parameters:
|
|
67
|
+
-----------
|
|
68
|
+
run: copick.Run
|
|
69
|
+
voxel_spacing: float
|
|
70
|
+
name: str
|
|
71
|
+
user_id: str
|
|
72
|
+
session_id: str
|
|
73
|
+
raise_error: bool
|
|
74
|
+
verbose: bool
|
|
75
|
+
Returns:
|
|
76
|
+
--------
|
|
77
|
+
seg: np.ndarray - The segmentation.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
# Construct the Target URI
|
|
81
|
+
if session_id is None and user_id is None:
|
|
82
|
+
uri = f"{name}@{voxel_spacing}"
|
|
83
|
+
elif session_id is None:
|
|
84
|
+
uri = f"{name}:{user_id}@{voxel_spacing}"
|
|
85
|
+
else:
|
|
86
|
+
uri = f"{name}:{user_id}/{session_id}@{voxel_spacing}"
|
|
87
|
+
|
|
88
|
+
# Try to resolve the segmentation using the Copick URI
|
|
89
|
+
try:
|
|
90
|
+
segs = resolve_copick_objects(uri, run.root, "segmentation", run_name=run.name)
|
|
91
|
+
return segs[0].numpy()
|
|
92
|
+
except Exception as err:
|
|
93
|
+
# Force the voxel spacing to be a float
|
|
94
|
+
voxel_spacing = float(voxel_spacing)
|
|
95
|
+
|
|
96
|
+
# Get all available segmentations with their metadata
|
|
97
|
+
available_segs = run.get_segmentations(voxel_size=voxel_spacing)
|
|
98
|
+
|
|
99
|
+
if len(available_segs) == 0:
|
|
100
|
+
available_segs = run.get_segmentations()
|
|
101
|
+
message = (
|
|
102
|
+
f"No segmentation found for URI: {uri}\n"
|
|
103
|
+
f"Available segmentations avaiable w/following voxel sizes: {', '.join(map(str, [s.voxel_size for s in available_segs]))}"
|
|
104
|
+
)
|
|
105
|
+
else:
|
|
106
|
+
seg_info = [(s.name, s.user_id, s.session_id) for s in available_segs]
|
|
107
|
+
|
|
108
|
+
# Format the information for display
|
|
109
|
+
seg_details = [f"(name: {name}, user_id: {uid}, session_id: {sid})" for name, uid, sid in seg_info]
|
|
110
|
+
|
|
111
|
+
message = (
|
|
112
|
+
f"\nNo segmentation at {voxel_spacing} A found matching:\n"
|
|
113
|
+
f" name: {name}, user_id: {user_id}, session_id: {session_id}\n"
|
|
114
|
+
f"Available segmentations in {run.name} are:\n " + "\n ".join(seg_details)
|
|
115
|
+
)
|
|
116
|
+
if raise_error:
|
|
117
|
+
raise ValueError(message) from err
|
|
118
|
+
elif verbose:
|
|
119
|
+
print(message)
|
|
120
|
+
else:
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def coordinates(
|
|
125
|
+
run, # CoPick run object containing the segmentation data
|
|
126
|
+
name: str, # Name of the object or protein for which coordinates are being extracted
|
|
127
|
+
user_id: str, # Identifier of the user that generated the picks
|
|
128
|
+
session_id: str = None, # Identifier of the session that generated the picks
|
|
129
|
+
voxel_size: float = 10, # Voxel size of the tomogram, used for scaling the coordinates
|
|
130
|
+
raise_error: bool = False,
|
|
131
|
+
verbose: bool = True,
|
|
132
|
+
):
|
|
133
|
+
"""
|
|
134
|
+
Reads the coordinates of the picks from a Copick run.
|
|
135
|
+
|
|
136
|
+
Parameters:
|
|
137
|
+
-----------
|
|
138
|
+
run: copick.Run
|
|
139
|
+
name: str
|
|
140
|
+
user_id: str
|
|
141
|
+
session_id: str
|
|
142
|
+
voxel_size: float
|
|
143
|
+
raise_error: bool
|
|
144
|
+
verbose: bool
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
--------
|
|
148
|
+
coordinates: np.ndarray - The 3D coordinates of the picks in voxel space.
|
|
149
|
+
"""
|
|
150
|
+
# Retrieve the pick points associated with the specified object and user ID
|
|
151
|
+
picks = run.get_picks(object_name=name, user_id=user_id, session_id=session_id)
|
|
152
|
+
|
|
153
|
+
if len(picks) == 0:
|
|
154
|
+
# Get all available segmentations with their metadata
|
|
155
|
+
|
|
156
|
+
available_picks = run.get_picks()
|
|
157
|
+
picks_info = [(s.pickable_object_name, s.user_id, s.session_id) for s in available_picks]
|
|
158
|
+
|
|
159
|
+
# Format the information for display
|
|
160
|
+
picks_details = [f"(name: {name}, user_id: {uid}, session_id: {sid})" for name, uid, sid in picks_info]
|
|
161
|
+
|
|
162
|
+
message = (
|
|
163
|
+
f"\nNo picks found matching:\n"
|
|
164
|
+
f" name: {name}, user_id: {user_id}, session_id: {session_id}\n"
|
|
165
|
+
f"Available picks are:\n " + "\n ".join(picks_details)
|
|
166
|
+
)
|
|
167
|
+
if raise_error:
|
|
168
|
+
raise ValueError(message)
|
|
169
|
+
elif verbose:
|
|
170
|
+
print(message)
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
elif len(picks) > 1:
|
|
174
|
+
# Format pick information for display
|
|
175
|
+
picks_info = [(p.pickable_object_name, p.user_id, p.session_id) for p in picks]
|
|
176
|
+
picks_details = [f"(name: {name}, user_id: {uid}, session_id: {sid})" for name, uid, sid in picks_info]
|
|
177
|
+
|
|
178
|
+
if verbose:
|
|
179
|
+
print(
|
|
180
|
+
"[Warning] More than 1 pick is available for the query information."
|
|
181
|
+
"\nAvailable picks are:\n " + "\n ".join(picks_details) + f"\n"
|
|
182
|
+
f"Defaulting to loading:\n {picks[0]}\n",
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
points = picks[0].points
|
|
186
|
+
|
|
187
|
+
# Initialize an array to store the coordinates
|
|
188
|
+
nPoints = len(picks[0].points) # Number of points retrieved
|
|
189
|
+
coordinates = np.zeros([len(picks[0].points), 3]) # Create an empty array to hold the (z, y, x) coordinates
|
|
190
|
+
|
|
191
|
+
# Iterate over all points and convert their locations to coordinates in voxel space
|
|
192
|
+
for ii in range(nPoints):
|
|
193
|
+
coordinates[ii,] = [
|
|
194
|
+
points[ii].location.z / voxel_size, # Scale z-coordinate by voxel size
|
|
195
|
+
points[ii].location.y / voxel_size, # Scale y-coordinate by voxel size
|
|
196
|
+
points[ii].location.x / voxel_size,
|
|
197
|
+
] # Scale x-coordinate by voxel size
|
|
198
|
+
|
|
199
|
+
# Return the array of coordinates
|
|
200
|
+
return coordinates
|