copick-utils 1.0.2__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. {copick_utils-1.0.2 → copick_utils-1.2.0}/.github/workflows/conventional-commits.yml +1 -1
  2. copick_utils-1.2.0/.release-please.manifest.json +3 -0
  3. {copick_utils-1.0.2 → copick_utils-1.2.0}/CHANGELOG.md +24 -0
  4. {copick_utils-1.0.2 → copick_utils-1.2.0}/PKG-INFO +2 -1
  5. {copick_utils-1.0.2 → copick_utils-1.2.0}/pyproject.toml +5 -0
  6. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/__init__.py +1 -1
  7. copick_utils-1.2.0/src/copick_utils/cli/download.py +34 -0
  8. copick_utils-1.2.0/src/copick_utils/cli/download_commands.py +11 -0
  9. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/processing_commands.py +2 -0
  10. copick_utils-1.2.0/src/copick_utils/cli/split_labels.py +148 -0
  11. copick_utils-1.2.0/src/copick_utils/io/portal.py +149 -0
  12. copick_utils-1.2.0/src/copick_utils/io/readers.py +200 -0
  13. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/io/writers.py +9 -9
  14. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/process/__init__.py +0 -4
  15. copick_utils-1.2.0/src/copick_utils/process/split_labels.py +214 -0
  16. copick_utils-1.0.2/.release-please.manifest.json +0 -3
  17. copick_utils-1.0.2/src/copick_utils/io/readers.py +0 -135
  18. {copick_utils-1.0.2 → copick_utils-1.2.0}/.github/dependabot.yml +0 -0
  19. {copick_utils-1.0.2 → copick_utils-1.2.0}/.github/workflows/py-formatting.yml +0 -0
  20. {copick_utils-1.0.2 → copick_utils-1.2.0}/.github/workflows/release-please.yml +0 -0
  21. {copick_utils-1.0.2 → copick_utils-1.2.0}/.gitignore +0 -0
  22. {copick_utils-1.0.2 → copick_utils-1.2.0}/.pre-commit-config.yaml +0 -0
  23. {copick_utils-1.0.2 → copick_utils-1.2.0}/LICENSE +0 -0
  24. {copick_utils-1.0.2 → copick_utils-1.2.0}/README.md +0 -0
  25. {copick_utils-1.0.2 → copick_utils-1.2.0}/SECURITY.md +0 -0
  26. {copick_utils-1.0.2 → copick_utils-1.2.0}/examples/segmentation_example.ipynb +0 -0
  27. {copick_utils-1.0.2 → copick_utils-1.2.0}/release-please.config.json +0 -0
  28. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/__init__.py +0 -0
  29. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/clipmesh.py +0 -0
  30. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/clippicks.py +0 -0
  31. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/clipseg.py +0 -0
  32. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/conversion_commands.py +0 -0
  33. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/enclosed.py +0 -0
  34. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/filter_components.py +0 -0
  35. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/fit_spline.py +0 -0
  36. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/hull.py +0 -0
  37. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/input_output_selection.py +0 -0
  38. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/logical_commands.py +0 -0
  39. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/mesh2picks.py +0 -0
  40. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/mesh2seg.py +0 -0
  41. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/meshop.py +0 -0
  42. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/picks2ellipsoid.py +0 -0
  43. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/picks2mesh.py +0 -0
  44. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/picks2plane.py +0 -0
  45. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/picks2seg.py +0 -0
  46. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/picks2sphere.py +0 -0
  47. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/picks2surface.py +0 -0
  48. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/picksin.py +0 -0
  49. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/picksout.py +0 -0
  50. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/seg2mesh.py +0 -0
  51. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/seg2picks.py +0 -0
  52. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/segop.py +0 -0
  53. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/separate_components.py +0 -0
  54. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/skeletonize.py +0 -0
  55. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/util.py +0 -0
  56. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/cli/validbox.py +0 -0
  57. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/__init__.py +0 -0
  58. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/converter_common.py +0 -0
  59. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/ellipsoid_from_picks.py +0 -0
  60. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/lazy_converter.py +0 -0
  61. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/mesh_from_picks.py +0 -0
  62. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/mesh_from_segmentation.py +0 -0
  63. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/picks_from_mesh.py +0 -0
  64. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/picks_from_segmentation.py +0 -0
  65. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/plane_from_picks.py +0 -0
  66. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/segmentation_from_mesh.py +0 -0
  67. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/segmentation_from_picks.py +0 -0
  68. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/sphere_from_picks.py +0 -0
  69. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/converters/surface_from_picks.py +0 -0
  70. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/features/__init__.py +0 -0
  71. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/features/skimage.py +0 -0
  72. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/io/__init__.py +0 -0
  73. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/logical/__init__.py +0 -0
  74. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/logical/distance_operations.py +0 -0
  75. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/logical/enclosed_operations.py +0 -0
  76. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/logical/mesh_operations.py +0 -0
  77. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/logical/point_operations.py +0 -0
  78. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/logical/segmentation_operations.py +0 -0
  79. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/pickers/__init__.py +0 -0
  80. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/pickers/grid_picker.py +0 -0
  81. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/process/connected_components.py +0 -0
  82. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/process/filter_components.py +0 -0
  83. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/process/hull.py +0 -0
  84. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/process/skeletonize.py +0 -0
  85. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/process/spline_fitting.py +0 -0
  86. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/process/validbox.py +0 -0
  87. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/util/__init__.py +0 -0
  88. {copick_utils-1.0.2 → copick_utils-1.2.0}/src/copick_utils/util/config_models.py +0 -0
  89. {copick_utils-1.0.2 → copick_utils-1.2.0}/tests/__init__.py +0 -0
@@ -16,4 +16,4 @@ jobs:
16
16
  conventional_commit_title:
17
17
  runs-on: ubuntu-latest
18
18
  steps:
19
- - uses: chanzuckerberg/github-actions/.github/actions/conventional-commits@v6.12.2
19
+ - uses: chanzuckerberg/github-actions/.github/actions/conventional-commits@v6.13.0
@@ -0,0 +1,3 @@
1
+ {
2
+ ".": "1.2.0"
3
+ }
@@ -1,5 +1,29 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.2.0](https://github.com/copick/copick-utils/compare/copick-utils-v1.1.0...copick-utils-v1.2.0) (2026-01-31)
4
+
5
+
6
+ ### ✨ Features
7
+
8
+ * add parser uri to readers, add function to download portal project fo… ([#43](https://github.com/copick/copick-utils/issues/43)) ([7743ee5](https://github.com/copick/copick-utils/commit/7743ee5facb6dff037f8348852c6edcd105abe25))
9
+
10
+
11
+ ### 🐞 Bug Fixes
12
+
13
+ * URI in copick readers ([#45](https://github.com/copick/copick-utils/issues/45)) ([926f01d](https://github.com/copick/copick-utils/commit/926f01dfccb537bca90fd30ee59ceacb83d7f947))
14
+
15
+ ## [1.1.0](https://github.com/copick/copick-utils/compare/copick-utils-v1.0.2...copick-utils-v1.1.0) (2026-01-26)
16
+
17
+
18
+ ### ✨ Features
19
+
20
+ * Add split command. ([#41](https://github.com/copick/copick-utils/issues/41)) ([479bf48](https://github.com/copick/copick-utils/commit/479bf48f9a50e3eb4066b3a41f2b399f110b2553))
21
+
22
+
23
+ ### 🐞 Bug Fixes
24
+
25
+ * bump chanzuckerberg/github-actions from 6.12.2 to 6.13.0 ([#40](https://github.com/copick/copick-utils/issues/40)) ([68b21c1](https://github.com/copick/copick-utils/commit/68b21c175e2ca96946ceba9252b86c17e3382fe2))
26
+
3
27
  ## [1.0.2](https://github.com/copick/copick-utils/compare/copick-utils-v1.0.1...copick-utils-v1.0.2) (2026-01-06)
4
28
 
5
29
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: copick-utils
3
- Version: 1.0.2
3
+ Version: 1.2.0
4
4
  Summary: Utilities for copick
5
5
  Project-URL: Repository, https://github.com/KyleHarrington/copick-utils.git
6
6
  Project-URL: Issues, https://github.com/KyleHarrington/copick-utils/issues
@@ -33,6 +33,7 @@ Requires-Dist: click-option-group
33
33
  Requires-Dist: copick>=1.16.0
34
34
  Requires-Dist: manifold3d
35
35
  Requires-Dist: mapbox-earcut
36
+ Requires-Dist: mdocfile
36
37
  Requires-Dist: numpy
37
38
  Requires-Dist: rtree
38
39
  Requires-Dist: scikit-image
@@ -29,6 +29,7 @@ dependencies = [
29
29
  "trimesh",
30
30
  "manifold3d",
31
31
  "mapbox-earcut",
32
+ "mdocfile",
32
33
  "tqdm",
33
34
  "scikit-learn",
34
35
  "shapely",
@@ -76,6 +77,7 @@ skeletonize = "copick_utils.cli.processing_commands:skeletonize"
76
77
  fit_spline = "copick_utils.cli.processing_commands:fit_spline"
77
78
  validbox = "copick_utils.cli.processing_commands:validbox"
78
79
  hull = "copick_utils.cli.processing_commands:hull"
80
+ split = "copick_utils.cli.processing_commands:split"
79
81
 
80
82
  [project.entry-points."copick.logical.commands"]
81
83
  meshop = "copick_utils.cli.logical_commands:meshop"
@@ -87,6 +89,9 @@ clippicks = "copick_utils.cli.logical_commands:clippicks"
87
89
  picksin = "copick_utils.cli.logical_commands:picksin"
88
90
  picksout = "copick_utils.cli.logical_commands:picksout"
89
91
 
92
+ [project.entry-points."copick.download.commands"]
93
+ project = "copick_utils.cli.download_commands:project"
94
+
90
95
  [tool.hatch.version]
91
96
  path = "src/copick_utils/__init__.py"
92
97
 
@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2024-present Kyle Harrington <czi@kyleharrington.com>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "1.0.2"
4
+ __version__ = "1.2.0"
@@ -0,0 +1,34 @@
1
+ import click
2
+
3
+
4
+ @click.command(
5
+ context_settings={"show_default": True},
6
+ short_help="Download tilt series and alignments from the CryoET Data Portal.",
7
+ no_args_is_help=True,
8
+ )
9
+ @click.option(
10
+ "-ds",
11
+ "--dataset",
12
+ required=True,
13
+ type=str,
14
+ help="Dataset ID to download from the CryoET Data Portal.",
15
+ )
16
+ @click.option(
17
+ "-o",
18
+ "--output",
19
+ required=True,
20
+ default=".",
21
+ type=str,
22
+ help="Output directory to save the downloaded files.",
23
+ )
24
+ def project(dataset: str, output: str):
25
+ """
26
+ Download tilt series and alignments from the CryoET Data Portal for sub-tomogram averaging with py2rely.
27
+ """
28
+ download_project(dataset, output)
29
+
30
+
31
+ def download_project(dataset: str, output: str):
32
+ import copick_utils.io.portal as portal
33
+
34
+ portal.download_aretomo_files(dataset, output)
@@ -0,0 +1,11 @@
1
+ """CLI commands for downloading data from the CryoET Data Portal.
2
+
3
+ This module imports all download commands from specialized files for better organization.
4
+ """
5
+
6
+ from copick_utils.cli.download import project
7
+
8
+ # All commands are now available for import by the main CLI
9
+ __all__ = [
10
+ "project",
11
+ ]
@@ -5,6 +5,7 @@ from copick_utils.cli.fit_spline import fit_spline
5
5
  from copick_utils.cli.hull import hull
6
6
  from copick_utils.cli.separate_components import separate_components
7
7
  from copick_utils.cli.skeletonize import skeletonize
8
+ from copick_utils.cli.split_labels import split
8
9
  from copick_utils.cli.validbox import validbox
9
10
 
10
11
  # All commands are now available for import by the main CLI
@@ -15,4 +16,5 @@ __all__ = [
15
16
  "separate_components",
16
17
  "filter_components",
17
18
  "fit_spline",
19
+ "split",
18
20
  ]
@@ -0,0 +1,148 @@
1
+ """CLI command for splitting multilabel segmentations into individual single-class segmentations."""
2
+
3
+ import click
4
+ import copick
5
+ from click_option_group import optgroup
6
+ from copick.cli.util import add_config_option, add_debug_option
7
+ from copick.util.log import get_logger
8
+ from copick.util.uri import parse_copick_uri
9
+
10
+ from copick_utils.cli.util import add_input_option, add_workers_option
11
+
12
+
13
+ @click.command(
14
+ context_settings={"show_default": True},
15
+ short_help="Split multilabel segmentations into single-class segmentations.",
16
+ no_args_is_help=True,
17
+ )
18
+ @add_config_option
19
+ @optgroup.group("\nInput Options", help="Options related to the input segmentation.")
20
+ @optgroup.option(
21
+ "--run-names",
22
+ "-r",
23
+ multiple=True,
24
+ help="Specific run names to process (default: all runs).",
25
+ )
26
+ @add_input_option("segmentation")
27
+ @optgroup.group("\nTool Options", help="Options related to this tool.")
28
+ @add_workers_option
29
+ @optgroup.group("\nOutput Options", help="Options related to output segmentations.")
30
+ @optgroup.option(
31
+ "--output-user-id",
32
+ type=str,
33
+ default="split",
34
+ help="User ID for output segmentations.",
35
+ )
36
+ @add_debug_option
37
+ def split(
38
+ config,
39
+ run_names,
40
+ input_uri,
41
+ workers,
42
+ output_user_id,
43
+ debug,
44
+ ):
45
+ """
46
+ Split multilabel segmentations into individual single-class binary segmentations.
47
+
48
+ This command takes a multilabel segmentation and creates separate binary segmentations
49
+ for each label value. Each output segmentation is named after the corresponding
50
+ PickableObject (as defined in the copick config) and uses the same session ID as
51
+ the input.
52
+
53
+ \b
54
+ URI Format:
55
+ Segmentations: name:user_id/session_id@voxel_spacing
56
+
57
+ \b
58
+ Label-to-Object Mapping:
59
+ The tool looks up each label value in the pickable_objects configuration
60
+ and uses the object name for the output segmentation:
61
+ - Label 1 (ribosome) → ribosome:split/session-001@10.0
62
+ - Label 2 (membrane) → membrane:split/session-001@10.0
63
+ - Label 3 (proteasome) → proteasome:split/session-001@10.0
64
+
65
+ \b
66
+ Examples:
67
+ # Split multilabel segmentation (outputs named by pickable objects)
68
+ copick process split -i "predictions:model/run-001@10.0"
69
+
70
+ # Split with custom output user ID
71
+ copick process split -i "classes:annotator/manual@10.0" --output-user-id "per-class"
72
+
73
+ # Process specific runs only
74
+ copick process split -i "labels:*/*@10.0" --run-names TS_001 --run-names TS_002
75
+ """
76
+
77
+ logger = get_logger(__name__, debug=debug)
78
+
79
+ root = copick.from_file(config)
80
+ run_names_list = list(run_names) if run_names else None
81
+
82
+ # Parse input URI
83
+ try:
84
+ input_params = parse_copick_uri(input_uri, "segmentation")
85
+ except ValueError as e:
86
+ raise click.BadParameter(f"Invalid input URI: {e}") from e
87
+
88
+ segmentation_name = input_params["name"]
89
+ segmentation_user_id = input_params["user_id"]
90
+ segmentation_session_id = input_params["session_id"]
91
+ voxel_spacing = input_params.get("voxel_spacing")
92
+
93
+ if voxel_spacing is None or voxel_spacing == "*":
94
+ raise click.BadParameter("Input URI must include a specific voxel spacing (e.g., @10.0)")
95
+
96
+ # Check for patterns in critical fields
97
+ if "*" in segmentation_name or "*" in segmentation_user_id or "*" in segmentation_session_id:
98
+ raise click.BadParameter(
99
+ "Input URI cannot contain wildcards for splitting. "
100
+ "Please specify exact segmentation name, user_id, and session_id.",
101
+ )
102
+
103
+ logger.info(f"Splitting multilabel segmentation '{segmentation_name}'")
104
+ logger.debug(f"Input: {segmentation_user_id}/{segmentation_session_id} @ {voxel_spacing}Å")
105
+ logger.debug(f"Output user ID: {output_user_id}")
106
+ logger.debug(f"Workers: {workers}")
107
+
108
+ # Import batch function
109
+ from copick_utils.process.split_labels import split_labels_batch
110
+
111
+ # Process runs
112
+ results = split_labels_batch(
113
+ root=root,
114
+ segmentation_name=segmentation_name,
115
+ segmentation_user_id=segmentation_user_id,
116
+ segmentation_session_id=segmentation_session_id,
117
+ voxel_spacing=float(voxel_spacing),
118
+ output_user_id=output_user_id,
119
+ run_names=run_names_list,
120
+ workers=workers,
121
+ )
122
+
123
+ # Aggregate results
124
+ successful = sum(1 for result in results.values() if result and result.get("processed", 0) > 0)
125
+ total_labels = sum(result.get("labels_split", 0) for result in results.values() if result)
126
+
127
+ # Collect all unique object names created
128
+ all_object_names = set()
129
+ for result in results.values():
130
+ if result and result.get("object_names"):
131
+ all_object_names.update(result["object_names"])
132
+
133
+ # Collect all errors
134
+ all_errors = []
135
+ for result in results.values():
136
+ if result and result.get("errors"):
137
+ all_errors.extend(result["errors"])
138
+
139
+ logger.info(f"Completed: {successful}/{len(results)} runs processed successfully")
140
+ logger.info(f"Total labels split: {total_labels}")
141
+ logger.info(f"Object names created: {', '.join(sorted(all_object_names))}")
142
+
143
+ if all_errors:
144
+ logger.warning(f"Encountered {len(all_errors)} errors during processing")
145
+ for error in all_errors[:5]: # Show first 5 errors
146
+ logger.warning(f" - {error}")
147
+ if len(all_errors) > 5:
148
+ logger.warning(f" ... and {len(all_errors) - 5} more errors")
@@ -0,0 +1,149 @@
1
+ """
2
+ A minimal example using minimal libraries / imports to download relevant AreTomo files
3
+ from the CryoET Data Portal. Downloads the corresponding files, using the run ID as the
4
+ base filename.
5
+
6
+ Original implementation by Daniel Ji and Utz Ermel.
7
+ """
8
+ import multiprocessing
9
+ import os
10
+
11
+ import cryoet_data_portal as cdp
12
+ import mdocfile
13
+ import numpy as np
14
+ import pandas as pd
15
+ import requests
16
+ import s3fs
17
+
18
+ global_client = cdp.Client()
19
+
20
+
21
+ def download_aretomo_files(dataset_id: int, output_dir: str):
22
+ print(f"Fetching tiltseries for dataset id {dataset_id}...", flush=True)
23
+ tiltseries_list: list[cdp.TiltSeries] = [
24
+ tiltseries for run in cdp.Dataset.get_by_id(global_client, dataset_id).runs for tiltseries in run.tiltseries
25
+ ] # a bit slow for some reason, can take some time
26
+ tiltseries_run_ids_and_ts_ids = [(ts.run.id, ts.id) for ts in tiltseries_list]
27
+ print(
28
+ f"Found {len(tiltseries_run_ids_and_ts_ids)} tiltseries for dataset id {dataset_id}. Starting downloads...",
29
+ flush=True,
30
+ )
31
+ with multiprocessing.Pool(processes=8) as pool: # adjust number of processes as needed
32
+ for _ in pool.imap_unordered(
33
+ _worker_download_aretomo_files_for_tiltseries,
34
+ [
35
+ (dataset_id, run_name, output_dir, tiltseries_id)
36
+ for run_name, tiltseries_id in tiltseries_run_ids_and_ts_ids
37
+ ],
38
+ ):
39
+ pass
40
+
41
+
42
+ def _worker_download_aretomo_files_for_tiltseries(args):
43
+ dataset_id, run_name, output_dir, tiltseries_id = args
44
+ download_aretomo_files_for_tiltseries(dataset_id, run_name, output_dir, tiltseries_id)
45
+
46
+
47
+ # note: this function assumes that there is only one tiltseries per run
48
+ # note: the tiltseries name is equivlaent to the run name
49
+ # if tiltseries_id is provided, will be prioritized over dataset_id + run_name
50
+ def download_aretomo_files_for_tiltseries(dataset_id: int, run_name: str, output_dir: str, tiltseries_id: int = None):
51
+ print(f"[{run_name}] Downloading AreTomo files for tiltseries id {tiltseries_id}...", flush=True)
52
+
53
+ client = cdp.Client()
54
+ s3 = s3fs.S3FileSystem(anon=True)
55
+ if not tiltseries_id:
56
+ all_tiltseries = cdp.TiltSeries.find(
57
+ client,
58
+ query_filters=[cdp.TiltSeries.run.dataset_id == dataset_id, cdp.TiltSeries.run.name == run_name],
59
+ )
60
+ if len(all_tiltseries) == 0:
61
+ raise ValueError(f"No tiltseries found for dataset_id {dataset_id} and run_name {run_name}")
62
+ if len(all_tiltseries) > 1:
63
+ raise ValueError(f"Multiple tiltseries found for dataset_id {dataset_id} and run_name {run_name}")
64
+ tiltseries = all_tiltseries[0]
65
+ else:
66
+ tiltseries = cdp.TiltSeries.get_by_id(client, tiltseries_id)
67
+
68
+ # get the s3 folder path and then glob for *.tlt / *.rawtlt files to download them, renaming the base to match the run id
69
+ s3_folder_path = tiltseries.s3_mrc_file.rsplit("/", 1)[0] + "/"
70
+ tlt_files = s3.glob(s3_folder_path + "*.tlt") + s3.glob(s3_folder_path + "*.rawtlt")
71
+ for tlt_file in tlt_files:
72
+ base_name = os.path.basename(tlt_file)
73
+ ext = os.path.splitext(base_name)[1]
74
+ dest_file = os.path.join(output_dir, f"{tiltseries.run.id}{ext}")
75
+ s3.get(tlt_file, dest_file)
76
+ print(f"[{tiltseries.run.id}] Downloaded {base_name} as {os.path.basename(dest_file)}.", flush=True)
77
+
78
+ # do the same for "*CTF*.txt" files and "*ctf*.txt" files
79
+ ctf_files = s3.glob(s3_folder_path + "*CTF*.txt") + s3.glob(s3_folder_path + "*ctf*.txt")
80
+ if len(ctf_files) == 0:
81
+ print(f"WARNING: No CTF files found for tiltseries id {tiltseries.id}")
82
+ else:
83
+ ctf_file = ctf_files[0]
84
+ base_name = os.path.basename(ctf_file)
85
+ if len(ctf_files) > 1:
86
+ print(f"WARNING: Multiple CTF files found for tiltseries id {tiltseries.id}, using {base_name}")
87
+ ext = os.path.splitext(base_name)[1]
88
+ dest_file = os.path.join(output_dir, f"{tiltseries.run.id}_CTF.txt")
89
+ s3.get(ctf_file, dest_file)
90
+ print(f"[{tiltseries.run.id}] Downloaded {base_name} as {os.path.basename(dest_file)}.", flush=True)
91
+
92
+ # now find the corresponding alignment for this tiltseries and download the "*.aln" file
93
+ if len(tiltseries.alignments) == 0:
94
+ print(f"WARNING: No alignments found for tiltseries id {tiltseries.id}")
95
+ elif len(tiltseries.alignments) > 1:
96
+ print(f"WARNING: Multiple alignments found for tiltseries id {tiltseries.id}")
97
+ else:
98
+ alignment = tiltseries.alignments[0]
99
+ s3_alignment_folder_path = alignment.s3_alignment_metadata.rsplit("/", 1)[0] + "/"
100
+ aln_files = s3.glob(s3_alignment_folder_path + "*.aln")
101
+ if len(aln_files) == 0:
102
+ raise ValueError(f"No .aln files found for run name {tiltseries.run.name} and alignment id {alignment.id}")
103
+ aln_file = aln_files[0]
104
+ base_name = os.path.basename(aln_file)
105
+ if len(aln_files) > 1:
106
+ print(f"WARNING: Multiple .aln files found for run name {tiltseries.run.name}, using {base_name}")
107
+ ext = os.path.splitext(base_name)[1]
108
+ dest_file = os.path.join(output_dir, f"{tiltseries.run.id}{ext}")
109
+ s3.get(aln_file, dest_file)
110
+ print(f"[{tiltseries.run.id}] Downloaded {base_name} as {os.path.basename(dest_file)}.", flush=True)
111
+
112
+ # now get the mdoc file from the Frames/ folder
113
+ frames = tiltseries.run.frames
114
+ if len(frames) == 0:
115
+ raise ValueError(f"No frames found for run name {tiltseries.run.name}")
116
+ frame = frames[0]
117
+ s3_frames_folder_path = frame.s3_frame_path.rsplit("/", 1)[0] + "/"
118
+ mdoc_files = s3.glob(s3_frames_folder_path + "*.mdoc")
119
+ if len(mdoc_files) == 0:
120
+ raise ValueError(f"No .mdoc files found for run name {tiltseries.run.name}")
121
+ mdoc_file = mdoc_files[0]
122
+ base_name = os.path.basename(mdoc_file)
123
+ if len(mdoc_files) > 1:
124
+ print(f"WARNING: Multiple .mdoc files found for run name {tiltseries.run.name}, using {base_name}")
125
+ ext = os.path.splitext(base_name)[1]
126
+ dest_file = os.path.join(output_dir, f"{tiltseries.run.id}{ext}")
127
+ s3.get(mdoc_file, dest_file)
128
+ print(f"[{tiltseries.run.id}] Downloaded {base_name} as {os.path.basename(dest_file)}.", flush=True)
129
+
130
+ # download tiltseries mrc file
131
+ tiltseries_file = os.path.join(output_dir, f"{tiltseries.run.id}.mrc")
132
+ tiltseries_url = tiltseries.https_mrc_file
133
+ response = requests.get(tiltseries_url, stream=True)
134
+ response.raise_for_status()
135
+ with open(tiltseries_file, "wb") as f:
136
+ for chunk in response.iter_content(chunk_size=8192):
137
+ f.write(chunk)
138
+ print(f"[{tiltseries.run.id}] Downloaded tiltseries mrc file as {os.path.basename(tiltseries_file)}.", flush=True)
139
+
140
+ # create imod file for order list
141
+ mdoc = mdocfile.read(os.path.join(output_dir, f"{tiltseries.run.id}.mdoc"))
142
+ order_list = mdoc["TiltAngle"]
143
+ imodpath = os.path.join(output_dir, f"{tiltseries.run.id}_Imod")
144
+ os.makedirs(imodpath, exist_ok=True)
145
+ number = np.arange(len(order_list)) + 1
146
+
147
+ # save in csv with 'ImageNumber', 'TiltAngle' headers
148
+ df = pd.DataFrame({"ImageNumber": number, "TiltAngle": order_list})
149
+ df.to_csv(os.path.join(imodpath, f"{tiltseries.run.id}_order_list.csv"), index=False)
@@ -0,0 +1,200 @@
1
+ import numpy as np
2
+ from copick.util.uri import resolve_copick_objects
3
+
4
+
5
+ def tomogram(run, voxel_size: float = 10, algorithm: str = "wbp", raise_error: bool = False, verbose=True):
6
+ """
7
+ Reads a tomogram from a Copick run.
8
+
9
+ Parameters:
10
+ -----------
11
+ run: copick.Run
12
+ voxel_size: float
13
+ algorithm: str
14
+ raise_error: bool
15
+ verbose: bool
16
+ Returns:
17
+ --------
18
+ vol: np.ndarray - The tomogram.
19
+ """
20
+
21
+ # Get the tomogram from the Copick URI
22
+ try:
23
+ uri = f"{algorithm}@{voxel_size}"
24
+ vol = resolve_copick_objects(uri, run.root, "tomogram", run_name=run.name)
25
+ return vol[0].numpy()
26
+ except Exception as err: # Report which orbject is missing
27
+ # Try to resolve the tomogram using the Copick URI
28
+ voxel_spacing_obj = run.get_voxel_spacing(voxel_size)
29
+
30
+ if voxel_spacing_obj is None:
31
+ # Query Avaiable Voxel Spacings
32
+ availableVoxelSpacings = [tomo.voxel_size for tomo in run.voxel_spacings]
33
+
34
+ # Report to the user which voxel spacings they can use
35
+ message = (
36
+ f"[Warning] No tomogram found for {run.name} with uri: {uri}\n"
37
+ f"Available voxel sizes are: {', '.join(map(str, availableVoxelSpacings))}"
38
+ )
39
+ if raise_error:
40
+ raise ValueError(message) from err
41
+ elif verbose:
42
+ print(message)
43
+ return None
44
+
45
+ tomogram = voxel_spacing_obj.get_tomogram(algorithm)
46
+ if tomogram is None:
47
+ # Get available algorithms
48
+ availableAlgorithms = [tomo.tomo_type for tomo in run.get_voxel_spacing(voxel_size).tomograms]
49
+
50
+ # Report to the user which algorithms are available
51
+ message = (
52
+ f"[Warning] No tomogram found for {run.name} with uri: {uri}\n"
53
+ f"Available algorithms @{voxel_size}A are: {', '.join(availableAlgorithms)}"
54
+ )
55
+ if raise_error:
56
+ raise ValueError(message) from err
57
+ elif verbose:
58
+ print(message)
59
+ return None
60
+
61
+
62
+ def segmentation(run, voxel_spacing: float, name: str, user_id=None, session_id=None, raise_error=False, verbose=True):
63
+ """
64
+ Reads a segmentation from a Copick run.
65
+
66
+ Parameters:
67
+ -----------
68
+ run: copick.Run
69
+ voxel_spacing: float
70
+ name: str
71
+ user_id: str
72
+ session_id: str
73
+ raise_error: bool
74
+ verbose: bool
75
+ Returns:
76
+ --------
77
+ seg: np.ndarray - The segmentation.
78
+ """
79
+
80
+ # Construct the Target URI
81
+ if session_id is None and user_id is None:
82
+ uri = f"{name}@{voxel_spacing}"
83
+ elif session_id is None:
84
+ uri = f"{name}:{user_id}@{voxel_spacing}"
85
+ else:
86
+ uri = f"{name}:{user_id}/{session_id}@{voxel_spacing}"
87
+
88
+ # Try to resolve the segmentation using the Copick URI
89
+ try:
90
+ segs = resolve_copick_objects(uri, run.root, "segmentation", run_name=run.name)
91
+ return segs[0].numpy()
92
+ except Exception as err:
93
+ # Force the voxel spacing to be a float
94
+ voxel_spacing = float(voxel_spacing)
95
+
96
+ # Get all available segmentations with their metadata
97
+ available_segs = run.get_segmentations(voxel_size=voxel_spacing)
98
+
99
+ if len(available_segs) == 0:
100
+ available_segs = run.get_segmentations()
101
+ message = (
102
+ f"No segmentation found for URI: {uri}\n"
103
+ f"Available segmentations avaiable w/following voxel sizes: {', '.join(map(str, [s.voxel_size for s in available_segs]))}"
104
+ )
105
+ else:
106
+ seg_info = [(s.name, s.user_id, s.session_id) for s in available_segs]
107
+
108
+ # Format the information for display
109
+ seg_details = [f"(name: {name}, user_id: {uid}, session_id: {sid})" for name, uid, sid in seg_info]
110
+
111
+ message = (
112
+ f"\nNo segmentation at {voxel_spacing} A found matching:\n"
113
+ f" name: {name}, user_id: {user_id}, session_id: {session_id}\n"
114
+ f"Available segmentations in {run.name} are:\n " + "\n ".join(seg_details)
115
+ )
116
+ if raise_error:
117
+ raise ValueError(message) from err
118
+ elif verbose:
119
+ print(message)
120
+ else:
121
+ return None
122
+
123
+
124
+ def coordinates(
125
+ run, # CoPick run object containing the segmentation data
126
+ name: str, # Name of the object or protein for which coordinates are being extracted
127
+ user_id: str, # Identifier of the user that generated the picks
128
+ session_id: str = None, # Identifier of the session that generated the picks
129
+ voxel_size: float = 10, # Voxel size of the tomogram, used for scaling the coordinates
130
+ raise_error: bool = False,
131
+ verbose: bool = True,
132
+ ):
133
+ """
134
+ Reads the coordinates of the picks from a Copick run.
135
+
136
+ Parameters:
137
+ -----------
138
+ run: copick.Run
139
+ name: str
140
+ user_id: str
141
+ session_id: str
142
+ voxel_size: float
143
+ raise_error: bool
144
+ verbose: bool
145
+
146
+ Returns:
147
+ --------
148
+ coordinates: np.ndarray - The 3D coordinates of the picks in voxel space.
149
+ """
150
+ # Retrieve the pick points associated with the specified object and user ID
151
+ picks = run.get_picks(object_name=name, user_id=user_id, session_id=session_id)
152
+
153
+ if len(picks) == 0:
154
+ # Get all available segmentations with their metadata
155
+
156
+ available_picks = run.get_picks()
157
+ picks_info = [(s.pickable_object_name, s.user_id, s.session_id) for s in available_picks]
158
+
159
+ # Format the information for display
160
+ picks_details = [f"(name: {name}, user_id: {uid}, session_id: {sid})" for name, uid, sid in picks_info]
161
+
162
+ message = (
163
+ f"\nNo picks found matching:\n"
164
+ f" name: {name}, user_id: {user_id}, session_id: {session_id}\n"
165
+ f"Available picks are:\n " + "\n ".join(picks_details)
166
+ )
167
+ if raise_error:
168
+ raise ValueError(message)
169
+ elif verbose:
170
+ print(message)
171
+ return None
172
+
173
+ elif len(picks) > 1:
174
+ # Format pick information for display
175
+ picks_info = [(p.pickable_object_name, p.user_id, p.session_id) for p in picks]
176
+ picks_details = [f"(name: {name}, user_id: {uid}, session_id: {sid})" for name, uid, sid in picks_info]
177
+
178
+ if verbose:
179
+ print(
180
+ "[Warning] More than 1 pick is available for the query information."
181
+ "\nAvailable picks are:\n " + "\n ".join(picks_details) + f"\n"
182
+ f"Defaulting to loading:\n {picks[0]}\n",
183
+ )
184
+
185
+ points = picks[0].points
186
+
187
+ # Initialize an array to store the coordinates
188
+ nPoints = len(picks[0].points) # Number of points retrieved
189
+ coordinates = np.zeros([len(picks[0].points), 3]) # Create an empty array to hold the (z, y, x) coordinates
190
+
191
+ # Iterate over all points and convert their locations to coordinates in voxel space
192
+ for ii in range(nPoints):
193
+ coordinates[ii,] = [
194
+ points[ii].location.z / voxel_size, # Scale z-coordinate by voxel size
195
+ points[ii].location.y / voxel_size, # Scale y-coordinate by voxel size
196
+ points[ii].location.x / voxel_size,
197
+ ] # Scale x-coordinate by voxel size
198
+
199
+ # Return the array of coordinates
200
+ return coordinates