Rhapso 0.1.92__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- Rhapso/__init__.py +1 -0
- Rhapso/data_prep/__init__.py +2 -0
- Rhapso/data_prep/n5_reader.py +188 -0
- Rhapso/data_prep/s3_big_stitcher_reader.py +55 -0
- Rhapso/data_prep/xml_to_dataframe.py +215 -0
- Rhapso/detection/__init__.py +5 -0
- Rhapso/detection/advanced_refinement.py +203 -0
- Rhapso/detection/difference_of_gaussian.py +324 -0
- Rhapso/detection/image_reader.py +117 -0
- Rhapso/detection/metadata_builder.py +130 -0
- Rhapso/detection/overlap_detection.py +327 -0
- Rhapso/detection/points_validation.py +49 -0
- Rhapso/detection/save_interest_points.py +265 -0
- Rhapso/detection/view_transform_models.py +67 -0
- Rhapso/fusion/__init__.py +0 -0
- Rhapso/fusion/affine_fusion/__init__.py +2 -0
- Rhapso/fusion/affine_fusion/blend.py +289 -0
- Rhapso/fusion/affine_fusion/fusion.py +601 -0
- Rhapso/fusion/affine_fusion/geometry.py +159 -0
- Rhapso/fusion/affine_fusion/io.py +546 -0
- Rhapso/fusion/affine_fusion/script_utils.py +111 -0
- Rhapso/fusion/affine_fusion/setup.py +4 -0
- Rhapso/fusion/affine_fusion_worker.py +234 -0
- Rhapso/fusion/multiscale/__init__.py +0 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/__init__.py +19 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/compress/__init__.py +3 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/compress/czi_to_zarr.py +698 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/compress/zarr_writer.py +265 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/models.py +81 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/utils/__init__.py +3 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/utils/utils.py +526 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/zeiss_job.py +249 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/__init__.py +21 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/array_to_zarr.py +257 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/radial_correction.py +557 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/run_capsule.py +98 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/utils/__init__.py +3 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/utils/utils.py +266 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/worker.py +89 -0
- Rhapso/fusion/multiscale_worker.py +113 -0
- Rhapso/fusion/neuroglancer_link_gen/__init__.py +8 -0
- Rhapso/fusion/neuroglancer_link_gen/dispim_link.py +235 -0
- Rhapso/fusion/neuroglancer_link_gen/exaspim_link.py +127 -0
- Rhapso/fusion/neuroglancer_link_gen/hcr_link.py +368 -0
- Rhapso/fusion/neuroglancer_link_gen/iSPIM_top.py +47 -0
- Rhapso/fusion/neuroglancer_link_gen/link_utils.py +239 -0
- Rhapso/fusion/neuroglancer_link_gen/main.py +299 -0
- Rhapso/fusion/neuroglancer_link_gen/ng_layer.py +1434 -0
- Rhapso/fusion/neuroglancer_link_gen/ng_state.py +1123 -0
- Rhapso/fusion/neuroglancer_link_gen/parsers.py +336 -0
- Rhapso/fusion/neuroglancer_link_gen/raw_link.py +116 -0
- Rhapso/fusion/neuroglancer_link_gen/utils/__init__.py +4 -0
- Rhapso/fusion/neuroglancer_link_gen/utils/shader_utils.py +85 -0
- Rhapso/fusion/neuroglancer_link_gen/utils/transfer.py +43 -0
- Rhapso/fusion/neuroglancer_link_gen/utils/utils.py +303 -0
- Rhapso/fusion/neuroglancer_link_gen_worker.py +30 -0
- Rhapso/matching/__init__.py +0 -0
- Rhapso/matching/load_and_transform_points.py +458 -0
- Rhapso/matching/ransac_matching.py +544 -0
- Rhapso/matching/save_matches.py +120 -0
- Rhapso/matching/xml_parser.py +302 -0
- Rhapso/pipelines/__init__.py +0 -0
- Rhapso/pipelines/ray/__init__.py +0 -0
- Rhapso/pipelines/ray/aws/__init__.py +0 -0
- Rhapso/pipelines/ray/aws/alignment_pipeline.py +227 -0
- Rhapso/pipelines/ray/aws/config/__init__.py +0 -0
- Rhapso/pipelines/ray/evaluation.py +71 -0
- Rhapso/pipelines/ray/interest_point_detection.py +137 -0
- Rhapso/pipelines/ray/interest_point_matching.py +110 -0
- Rhapso/pipelines/ray/local/__init__.py +0 -0
- Rhapso/pipelines/ray/local/alignment_pipeline.py +167 -0
- Rhapso/pipelines/ray/matching_stats.py +104 -0
- Rhapso/pipelines/ray/param/__init__.py +0 -0
- Rhapso/pipelines/ray/solver.py +120 -0
- Rhapso/pipelines/ray/split_dataset.py +78 -0
- Rhapso/solver/__init__.py +0 -0
- Rhapso/solver/compute_tiles.py +562 -0
- Rhapso/solver/concatenate_models.py +116 -0
- Rhapso/solver/connected_graphs.py +111 -0
- Rhapso/solver/data_prep.py +181 -0
- Rhapso/solver/global_optimization.py +410 -0
- Rhapso/solver/model_and_tile_setup.py +109 -0
- Rhapso/solver/pre_align_tiles.py +323 -0
- Rhapso/solver/save_results.py +97 -0
- Rhapso/solver/view_transforms.py +75 -0
- Rhapso/solver/xml_to_dataframe_solver.py +213 -0
- Rhapso/split_dataset/__init__.py +0 -0
- Rhapso/split_dataset/compute_grid_rules.py +78 -0
- Rhapso/split_dataset/save_points.py +101 -0
- Rhapso/split_dataset/save_xml.py +377 -0
- Rhapso/split_dataset/split_images.py +537 -0
- Rhapso/split_dataset/xml_to_dataframe_split.py +219 -0
- rhapso-0.1.92.dist-info/METADATA +39 -0
- rhapso-0.1.92.dist-info/RECORD +101 -0
- rhapso-0.1.92.dist-info/WHEEL +5 -0
- rhapso-0.1.92.dist-info/licenses/LICENSE +21 -0
- rhapso-0.1.92.dist-info/top_level.txt +2 -0
- tests/__init__.py +1 -0
- tests/test_detection.py +17 -0
- tests/test_matching.py +21 -0
- tests/test_solving.py +21 -0
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
"""Module to handle zeiss data compression"""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from time import time
|
|
9
|
+
from typing import Any, List, Optional
|
|
10
|
+
|
|
11
|
+
from aind_data_transformation.core import GenericEtl, JobResponse, get_parser
|
|
12
|
+
from numcodecs.blosc import Blosc
|
|
13
|
+
|
|
14
|
+
from .compress.czi_to_zarr import (
|
|
15
|
+
czi_stack_zarr_writer,
|
|
16
|
+
)
|
|
17
|
+
from .models import (
|
|
18
|
+
CompressorName,
|
|
19
|
+
ZeissJobSettings,
|
|
20
|
+
)
|
|
21
|
+
from .utils import utils
|
|
22
|
+
|
|
23
|
+
logging.basicConfig(level=os.getenv("LOG_LEVEL", "WARNING"))
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ZeissCompressionJob(GenericEtl[ZeissJobSettings]):
|
|
27
|
+
"""Job to handle compressing and uploading Zeiss data."""
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def partition_list(
|
|
31
|
+
lst: List[Any], num_of_partitions: int
|
|
32
|
+
) -> List[List[Any]]:
|
|
33
|
+
"""Partitions a list"""
|
|
34
|
+
accumulated_list = []
|
|
35
|
+
for _ in range(num_of_partitions):
|
|
36
|
+
accumulated_list.append([])
|
|
37
|
+
for list_item_index, list_item in enumerate(lst):
|
|
38
|
+
a_index = list_item_index % num_of_partitions
|
|
39
|
+
accumulated_list[a_index].append(list_item)
|
|
40
|
+
return accumulated_list
|
|
41
|
+
|
|
42
|
+
def _get_partitioned_list_of_stack_paths(self) -> List[List[Path]]:
|
|
43
|
+
"""
|
|
44
|
+
Scans through the input source and partitions a list of stack
|
|
45
|
+
paths that it finds there.
|
|
46
|
+
"""
|
|
47
|
+
all_stack_paths = []
|
|
48
|
+
total_counter = 0
|
|
49
|
+
for p in (
|
|
50
|
+
Path(self.job_settings.input_source).joinpath("SPIM").glob("*.czi")
|
|
51
|
+
):
|
|
52
|
+
if p.is_file():
|
|
53
|
+
total_counter += 1
|
|
54
|
+
all_stack_paths.append(p)
|
|
55
|
+
|
|
56
|
+
# Important to sort paths so every node computes the same list
|
|
57
|
+
all_stack_paths.sort(key=lambda x: str(x))
|
|
58
|
+
return self.partition_list(
|
|
59
|
+
all_stack_paths, self.job_settings.num_of_partitions
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
@staticmethod
|
|
63
|
+
def _get_voxel_resolution(acquisition_path: Path) -> List[float]:
|
|
64
|
+
"""Get the voxel resolution from an acquisition.json file."""
|
|
65
|
+
|
|
66
|
+
if not acquisition_path.is_file():
|
|
67
|
+
raise FileNotFoundError(
|
|
68
|
+
f"acquisition.json file not found at: {acquisition_path}"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
acquisition_config = utils.read_json_as_dict(acquisition_path)
|
|
72
|
+
|
|
73
|
+
# Grabbing a tile with metadata from acquisition - we assume all
|
|
74
|
+
# dataset was acquired with the same resolution
|
|
75
|
+
tile_coord_transforms = acquisition_config["tiles"][0][
|
|
76
|
+
"coordinate_transformations"
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
scale_transform = [
|
|
80
|
+
x["scale"] for x in tile_coord_transforms if x["type"] == "scale"
|
|
81
|
+
][0]
|
|
82
|
+
|
|
83
|
+
x = float(scale_transform[0])
|
|
84
|
+
y = float(scale_transform[1])
|
|
85
|
+
z = float(scale_transform[2])
|
|
86
|
+
|
|
87
|
+
return [z, y, x]
|
|
88
|
+
|
|
89
|
+
def _get_compressor(self) -> Optional[Blosc]:
|
|
90
|
+
"""
|
|
91
|
+
Utility method to construct a compressor class.
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
Blosc | None
|
|
95
|
+
An instantiated Blosc compressor. Return None if not set in configs.
|
|
96
|
+
|
|
97
|
+
"""
|
|
98
|
+
if self.job_settings.compressor_name == CompressorName.BLOSC:
|
|
99
|
+
return Blosc(**self.job_settings.compressor_kwargs)
|
|
100
|
+
else:
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
def _write_stacks(self, stacks_to_process: List) -> None:
|
|
104
|
+
"""
|
|
105
|
+
Write a list of stacks.
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
stacks_to_process : List
|
|
109
|
+
|
|
110
|
+
Returns
|
|
111
|
+
-------
|
|
112
|
+
None
|
|
113
|
+
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
if not len(stacks_to_process):
|
|
117
|
+
logging.info("No stacks to process!")
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
compressor = self._get_compressor()
|
|
121
|
+
|
|
122
|
+
# Acquisition path in root folder
|
|
123
|
+
acquisition_path = self.job_settings.input_source.joinpath(
|
|
124
|
+
"acquisition.json"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Getting voxel resolution
|
|
128
|
+
voxel_size_zyx = self._get_voxel_resolution(
|
|
129
|
+
acquisition_path=acquisition_path
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Converting CZI tiles to Multiscale OMEZarr
|
|
133
|
+
for stack in stacks_to_process:
|
|
134
|
+
logging.info(f"Converting {stack}")
|
|
135
|
+
stack_name = stack.stem
|
|
136
|
+
|
|
137
|
+
output_path = Path(self.job_settings.output_directory)
|
|
138
|
+
|
|
139
|
+
msg = (
|
|
140
|
+
f"Voxel resolution ZYX {voxel_size_zyx} for {stack} "
|
|
141
|
+
f"with name {stack_name} - output: {output_path}"
|
|
142
|
+
)
|
|
143
|
+
logging.info(msg)
|
|
144
|
+
|
|
145
|
+
czi_stack_zarr_writer(
|
|
146
|
+
czi_path=str(stack),
|
|
147
|
+
output_path=output_path,
|
|
148
|
+
voxel_size=voxel_size_zyx,
|
|
149
|
+
final_chunksize=self.job_settings.chunk_size,
|
|
150
|
+
scale_factor=self.job_settings.scale_factor,
|
|
151
|
+
n_lvls=self.job_settings.downsample_levels,
|
|
152
|
+
channel_name=stack_name,
|
|
153
|
+
stack_name=f"{stack_name}.ome.zarr",
|
|
154
|
+
logger=logging,
|
|
155
|
+
writing_options=compressor,
|
|
156
|
+
target_size_mb=self.job_settings.target_size_mb,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
if self.job_settings.s3_location is not None:
|
|
160
|
+
channel_zgroup_file = output_path / ".zgroup"
|
|
161
|
+
s3_channel_zgroup_file = (
|
|
162
|
+
f"{self.job_settings.s3_location}/.zgroup"
|
|
163
|
+
)
|
|
164
|
+
logging.info(
|
|
165
|
+
f"Uploading {channel_zgroup_file} to "
|
|
166
|
+
f"{s3_channel_zgroup_file}"
|
|
167
|
+
)
|
|
168
|
+
utils.copy_file_to_s3(
|
|
169
|
+
channel_zgroup_file, s3_channel_zgroup_file
|
|
170
|
+
)
|
|
171
|
+
ome_zarr_stack_name = f"{stack_name}.ome.zarr"
|
|
172
|
+
ome_zarr_stack_path = output_path.joinpath(ome_zarr_stack_name)
|
|
173
|
+
s3_stack_dir = (
|
|
174
|
+
f"{self.job_settings.s3_location}/"
|
|
175
|
+
f"{ome_zarr_stack_name}"
|
|
176
|
+
)
|
|
177
|
+
logging.info(
|
|
178
|
+
f"Uploading {ome_zarr_stack_path} to {s3_stack_dir}"
|
|
179
|
+
)
|
|
180
|
+
utils.sync_dir_to_s3(ome_zarr_stack_path, s3_stack_dir)
|
|
181
|
+
logging.info(f"Removing: {ome_zarr_stack_path}")
|
|
182
|
+
# Remove stack if uploaded to s3. We can potentially do all
|
|
183
|
+
# the stacks in the partition in parallel using dask to speed
|
|
184
|
+
# this up
|
|
185
|
+
shutil.rmtree(ome_zarr_stack_path)
|
|
186
|
+
|
|
187
|
+
def _upload_derivatives_folder(self):
|
|
188
|
+
"""
|
|
189
|
+
Uploads the derivatives folder inside of
|
|
190
|
+
the SPIM folder in the cloud.
|
|
191
|
+
"""
|
|
192
|
+
s3_derivatives_dir = f"{self.job_settings.s3_location}/derivatives"
|
|
193
|
+
derivatives_path = Path(self.job_settings.input_source).joinpath(
|
|
194
|
+
"derivatives"
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
if not derivatives_path.exists():
|
|
198
|
+
raise FileNotFoundError(f"{derivatives_path} does not exist.")
|
|
199
|
+
|
|
200
|
+
if self.job_settings.s3_location is not None:
|
|
201
|
+
logging.info(
|
|
202
|
+
f"Uploading {derivatives_path} to {s3_derivatives_dir}"
|
|
203
|
+
)
|
|
204
|
+
utils.sync_dir_to_s3(derivatives_path, s3_derivatives_dir)
|
|
205
|
+
logging.info(f"{derivatives_path} uploaded to s3.")
|
|
206
|
+
|
|
207
|
+
def run_job(self):
|
|
208
|
+
"""Main entrypoint to run the job."""
|
|
209
|
+
job_start_time = time()
|
|
210
|
+
|
|
211
|
+
# Reading data within the SPIM folder
|
|
212
|
+
partitioned_list = self._get_partitioned_list_of_stack_paths()
|
|
213
|
+
|
|
214
|
+
# Upload derivatives folder
|
|
215
|
+
if self.job_settings.partition_to_process == 0:
|
|
216
|
+
self._upload_derivatives_folder()
|
|
217
|
+
|
|
218
|
+
stacks_to_process = partitioned_list[
|
|
219
|
+
self.job_settings.partition_to_process
|
|
220
|
+
]
|
|
221
|
+
|
|
222
|
+
self._write_stacks(stacks_to_process=stacks_to_process)
|
|
223
|
+
total_job_duration = time() - job_start_time
|
|
224
|
+
return JobResponse(
|
|
225
|
+
status_code=200, message=f"Job finished in {total_job_duration}"
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
# TODO: Add this to core aind_data_transformation class
|
|
230
|
+
def job_entrypoint(sys_args: list):
|
|
231
|
+
"""Main function"""
|
|
232
|
+
parser = get_parser()
|
|
233
|
+
cli_args = parser.parse_args(sys_args)
|
|
234
|
+
if cli_args.job_settings is not None:
|
|
235
|
+
job_settings = ZeissJobSettings.model_validate_json(
|
|
236
|
+
cli_args.job_settings
|
|
237
|
+
)
|
|
238
|
+
elif cli_args.config_file is not None:
|
|
239
|
+
job_settings = ZeissJobSettings.from_config_file(cli_args.config_file)
|
|
240
|
+
else:
|
|
241
|
+
# Construct settings from env vars
|
|
242
|
+
job_settings = ZeissJobSettings()
|
|
243
|
+
job = ZeissCompressionJob(job_settings=job_settings)
|
|
244
|
+
job_response = job.run_job()
|
|
245
|
+
logging.info(job_response.model_dump_json())
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
if __name__ == "__main__":
|
|
249
|
+
job_entrypoint(sys.argv[1:])
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Z1 radial correction."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.0.4"
|
|
4
|
+
__authors__ = ["Camilo Laiton", "Carson Berry", "Tim Wang"]
|
|
5
|
+
__author_emails__ = [
|
|
6
|
+
"camilo.laiton@alleninstitute.org",
|
|
7
|
+
"carson.berry@alleninstitute.org",
|
|
8
|
+
"tim.wang@alleninstitute.org",
|
|
9
|
+
]
|
|
10
|
+
__license__ = "MIT"
|
|
11
|
+
__description__ = "Image preprocessing for radial correction"
|
|
12
|
+
__url__ = "https://github.com/AllenNeuralDynamics/aind-z1-radial-correction"
|
|
13
|
+
|
|
14
|
+
__maintainers__ = ["Camilo Laiton", "Carson Berry"]
|
|
15
|
+
__maintainer_emails__ = [
|
|
16
|
+
"carson.berry@alleninstitute.org",
|
|
17
|
+
"camilo.laiton@alleninstitute.org",
|
|
18
|
+
]
|
|
19
|
+
__title__ = "aind-z1-radial-correction"
|
|
20
|
+
__status__ = "Development" # 'Development' 'Production', 'Beta'
|
|
21
|
+
__pipeline_version__ = "0.0.1"
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Writes a multiscale zarrv3 dataset from an array
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, List, Optional
|
|
8
|
+
import dask.array as da
|
|
9
|
+
import numpy as np
|
|
10
|
+
import zarr
|
|
11
|
+
from Rhapso.fusion.multiscale.aind_hcr_data_transformation.compress.czi_to_zarr import (
|
|
12
|
+
_get_pyramid_metadata,
|
|
13
|
+
compute_pyramid,
|
|
14
|
+
write_ome_ngff_metadata,
|
|
15
|
+
)
|
|
16
|
+
from Rhapso.fusion.multiscale.aind_hcr_data_transformation.compress.zarr_writer import (
|
|
17
|
+
BlockedArrayWriter,
|
|
18
|
+
)
|
|
19
|
+
from Rhapso.fusion.multiscale.aind_hcr_data_transformation.utils.utils import pad_array_n_d
|
|
20
|
+
from numcodecs.blosc import Blosc
|
|
21
|
+
from numpy.typing import ArrayLike
|
|
22
|
+
from ome_zarr.io import parse_url
|
|
23
|
+
from zarr.errors import ContainsGroupError
|
|
24
|
+
from zarr.storage import FSStore
|
|
25
|
+
from .utils.utils import get_parent_path, is_s3_path
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def safe_create_zarr_group(store, path: str = "", **kwargs):
|
|
29
|
+
"""
|
|
30
|
+
Safe creation of the zarr group.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
store
|
|
35
|
+
Zarr store
|
|
36
|
+
path: str
|
|
37
|
+
Path to the creation of the zarr group
|
|
38
|
+
Default: ''
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
Zarr.group
|
|
43
|
+
Zarr group pointing to where the data is written
|
|
44
|
+
"""
|
|
45
|
+
try:
|
|
46
|
+
return zarr.group(store=store, path=path, overwrite=False, **kwargs)
|
|
47
|
+
except ContainsGroupError:
|
|
48
|
+
# Group already exists, which is expected with multiple workers
|
|
49
|
+
return zarr.open_group(store=store, path=path, mode="r+")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def convert_array_to_zarr(
|
|
53
|
+
array: ArrayLike,
|
|
54
|
+
chunk_size: List[int],
|
|
55
|
+
output_path: str,
|
|
56
|
+
voxel_size: List[float],
|
|
57
|
+
n_lvls: Optional[int] = 6,
|
|
58
|
+
scale_factor: Optional[List[int]] = [2, 2, 2],
|
|
59
|
+
compressor_kwargs: Optional[Dict] = {
|
|
60
|
+
"cname": "zstd",
|
|
61
|
+
"clevel": 3,
|
|
62
|
+
"shuffle": Blosc.SHUFFLE,
|
|
63
|
+
},
|
|
64
|
+
target_size_mb: Optional[int] = 24000,
|
|
65
|
+
):
|
|
66
|
+
"""
|
|
67
|
+
Converts an array to zarr format
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
array: ArrayLike
|
|
72
|
+
Array to convert to zarr v3
|
|
73
|
+
|
|
74
|
+
chunk_size: List[int]
|
|
75
|
+
Chunksize in each shard
|
|
76
|
+
|
|
77
|
+
output_path: str
|
|
78
|
+
Output path. It must contain the ome.zarr
|
|
79
|
+
extension attached.
|
|
80
|
+
|
|
81
|
+
voxel_size: List[float]
|
|
82
|
+
Voxel size
|
|
83
|
+
|
|
84
|
+
n_lvls: Optional[int]
|
|
85
|
+
Number of downsampled levels to write.
|
|
86
|
+
Default: 6
|
|
87
|
+
|
|
88
|
+
scale_factor: Optional[List[int]]
|
|
89
|
+
Scaling factor per axis. Default: [2, 2, 2]
|
|
90
|
+
|
|
91
|
+
compressor_kwargs: Optional[Dict]
|
|
92
|
+
Compressor parameters
|
|
93
|
+
Default: {"cname": "zstd", "clevel": 3, "shuffle": "shuffle"}
|
|
94
|
+
"""
|
|
95
|
+
logger = logging.getLogger(__name__)
|
|
96
|
+
array = pad_array_n_d(array)
|
|
97
|
+
dataset_shape = tuple(i for i in array.shape if i != 1)
|
|
98
|
+
extra_axes = (1,) * (5 - len(dataset_shape))
|
|
99
|
+
dataset_shape = extra_axes + dataset_shape
|
|
100
|
+
chunk_size = ([1] * (5 - len(chunk_size))) + chunk_size
|
|
101
|
+
|
|
102
|
+
#verify that the chunksize is not larger than the dataset shape
|
|
103
|
+
for i, val in enumerate(dataset_shape):
|
|
104
|
+
if chunk_size[i] > val:
|
|
105
|
+
chunk_size[i] = val
|
|
106
|
+
|
|
107
|
+
compressor = Blosc(
|
|
108
|
+
cname=compressor_kwargs["cname"],
|
|
109
|
+
clevel=compressor_kwargs["clevel"],
|
|
110
|
+
shuffle=compressor_kwargs["shuffle"],
|
|
111
|
+
blocksize=0,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# Getting channel color
|
|
115
|
+
channel_colors = None
|
|
116
|
+
stack_name = Path(output_path).name
|
|
117
|
+
parent_path = get_parent_path(output_path)
|
|
118
|
+
# Creating Zarr dataset in s3 or local
|
|
119
|
+
if is_s3_path(output_path):
|
|
120
|
+
store = FSStore(parent_path, mode="w", dimension_separator="/")
|
|
121
|
+
else:
|
|
122
|
+
store = parse_url(path=parent_path, mode="w").store
|
|
123
|
+
|
|
124
|
+
root_group = safe_create_zarr_group(store=store)
|
|
125
|
+
|
|
126
|
+
# Using 1 thread since is in single machine.
|
|
127
|
+
# Avoiding the use of multithreaded due to GIL
|
|
128
|
+
if np.issubdtype(array.dtype, np.integer):
|
|
129
|
+
np_info_func = np.iinfo
|
|
130
|
+
|
|
131
|
+
else:
|
|
132
|
+
# Floating point
|
|
133
|
+
np_info_func = np.finfo
|
|
134
|
+
|
|
135
|
+
# Getting min max metadata for the dtype
|
|
136
|
+
channel_minmax = [
|
|
137
|
+
(
|
|
138
|
+
# int(np_info_func(array.dtype).min),
|
|
139
|
+
int(0.0),
|
|
140
|
+
# int(np_info_func(array.dtype).max),
|
|
141
|
+
int(1.0),
|
|
142
|
+
)
|
|
143
|
+
for _ in range(dataset_shape[1])
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
# Setting values for CZI
|
|
147
|
+
# Ideally we would use da.percentile(image_data, (0.1, 95))
|
|
148
|
+
# However, it would take so much time and resources and it is
|
|
149
|
+
# not used that much on neuroglancer
|
|
150
|
+
channel_startend = [(int(0), int(1.0)) for _ in range(dataset_shape[1])]
|
|
151
|
+
|
|
152
|
+
# Writing OME-NGFF metadata
|
|
153
|
+
scale_factor = [int(s) for s in scale_factor]
|
|
154
|
+
voxel_size = [float(v) for v in voxel_size]
|
|
155
|
+
|
|
156
|
+
new_channel_group = root_group.create_group(
|
|
157
|
+
name=stack_name, overwrite=True
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Writing OME-NGFF metadata
|
|
161
|
+
write_ome_ngff_metadata(
|
|
162
|
+
group=new_channel_group,
|
|
163
|
+
arr_shape=dataset_shape,
|
|
164
|
+
image_name=stack_name,
|
|
165
|
+
n_lvls=n_lvls,
|
|
166
|
+
scale_factors=scale_factor,
|
|
167
|
+
voxel_size=voxel_size,
|
|
168
|
+
channel_names=None,
|
|
169
|
+
channel_colors=channel_colors,
|
|
170
|
+
channel_minmax=channel_minmax,
|
|
171
|
+
channel_startend=channel_startend,
|
|
172
|
+
metadata=_get_pyramid_metadata(),
|
|
173
|
+
final_chunksize=chunk_size,
|
|
174
|
+
origin = [0,0,0]
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# Writing first multiscale by default
|
|
178
|
+
pyramid_group = new_channel_group.create_dataset(
|
|
179
|
+
name="0",
|
|
180
|
+
shape=dataset_shape,
|
|
181
|
+
chunks=chunk_size,
|
|
182
|
+
dtype=array.dtype,
|
|
183
|
+
compressor=compressor,
|
|
184
|
+
dimension_separator="/",
|
|
185
|
+
overwrite=True,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Writing multiscales
|
|
189
|
+
# Handle both numpy arrays and dask arrays
|
|
190
|
+
if isinstance(array, da.Array):
|
|
191
|
+
# Already a dask array, rechunk if needed
|
|
192
|
+
previous_scale = da.rechunk(array, chunks=pyramid_group.chunks)
|
|
193
|
+
else:
|
|
194
|
+
# Convert numpy array to dask array
|
|
195
|
+
previous_scale = da.from_array(array, pyramid_group.chunks)
|
|
196
|
+
|
|
197
|
+
block_shape = list(
|
|
198
|
+
BlockedArrayWriter.get_block_shape(
|
|
199
|
+
arr=previous_scale,
|
|
200
|
+
target_size_mb=target_size_mb,
|
|
201
|
+
chunks=chunk_size,
|
|
202
|
+
)
|
|
203
|
+
)
|
|
204
|
+
block_shape = extra_axes + tuple(block_shape)
|
|
205
|
+
|
|
206
|
+
logger.info(f"Writing {n_lvls} pyramid levels...")
|
|
207
|
+
|
|
208
|
+
for level in range(0, n_lvls):
|
|
209
|
+
if not level:
|
|
210
|
+
array_to_write = previous_scale
|
|
211
|
+
logger.info(f"Level {level}/{n_lvls-1}: Writing full resolution - shape {array_to_write.shape}")
|
|
212
|
+
|
|
213
|
+
else:
|
|
214
|
+
previous_scale = da.from_zarr(pyramid_group, pyramid_group.chunks)
|
|
215
|
+
new_scale_factor = (
|
|
216
|
+
[1] * (len(previous_scale.shape) - len(scale_factor))
|
|
217
|
+
) + scale_factor
|
|
218
|
+
|
|
219
|
+
logger.info(f"Level {level}/{n_lvls-1}: Computing downsampled pyramid (scale factor: {scale_factor})...")
|
|
220
|
+
previous_scale_pyramid, _ = compute_pyramid(
|
|
221
|
+
data=previous_scale,
|
|
222
|
+
scale_axis=new_scale_factor,
|
|
223
|
+
chunks=chunk_size,
|
|
224
|
+
n_lvls=2,
|
|
225
|
+
)
|
|
226
|
+
array_to_write = previous_scale_pyramid[-1]
|
|
227
|
+
|
|
228
|
+
logger.info(f"Level {level}/{n_lvls-1}: Downsampled to shape {array_to_write.shape}")
|
|
229
|
+
|
|
230
|
+
pyramid_group = new_channel_group.create_dataset(
|
|
231
|
+
name=str(level),
|
|
232
|
+
shape=array_to_write.shape,
|
|
233
|
+
chunks=chunk_size,
|
|
234
|
+
dtype=array_to_write.dtype,
|
|
235
|
+
compressor=compressor,
|
|
236
|
+
dimension_separator="/",
|
|
237
|
+
overwrite=True,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
logger.info(f"Level {level}/{n_lvls-1}: Writing to storage...")
|
|
241
|
+
BlockedArrayWriter.store(array_to_write, pyramid_group, block_shape)
|
|
242
|
+
logger.info(f"Level {level}/{n_lvls-1}: ✓ Complete ({level+1}/{n_lvls} levels done)")
|
|
243
|
+
|
|
244
|
+
if __name__ == "__main__":
|
|
245
|
+
BASE_PATH = "/data"
|
|
246
|
+
tilename = "Tile_X_0000_Y_0011_Z_0000_ch_488.ome.zarr"
|
|
247
|
+
test_dataset = f"HCR_785830_2025-03-19_17-00-00/SPIM/{tilename}"
|
|
248
|
+
scale = "0"
|
|
249
|
+
|
|
250
|
+
dataset = da.from_zarr(f"{BASE_PATH}/{test_dataset}/{scale}").compute()
|
|
251
|
+
convert_array_to_zarr(
|
|
252
|
+
array=dataset,
|
|
253
|
+
voxel_size=[1.0] * 3,
|
|
254
|
+
shard_size=[512] * 3,
|
|
255
|
+
chunk_size=[128] * 3,
|
|
256
|
+
output_path="/results/test.ome.zarr",
|
|
257
|
+
)
|