Rhapso 0.1.92__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- Rhapso/__init__.py +1 -0
- Rhapso/data_prep/__init__.py +2 -0
- Rhapso/data_prep/n5_reader.py +188 -0
- Rhapso/data_prep/s3_big_stitcher_reader.py +55 -0
- Rhapso/data_prep/xml_to_dataframe.py +215 -0
- Rhapso/detection/__init__.py +5 -0
- Rhapso/detection/advanced_refinement.py +203 -0
- Rhapso/detection/difference_of_gaussian.py +324 -0
- Rhapso/detection/image_reader.py +117 -0
- Rhapso/detection/metadata_builder.py +130 -0
- Rhapso/detection/overlap_detection.py +327 -0
- Rhapso/detection/points_validation.py +49 -0
- Rhapso/detection/save_interest_points.py +265 -0
- Rhapso/detection/view_transform_models.py +67 -0
- Rhapso/fusion/__init__.py +0 -0
- Rhapso/fusion/affine_fusion/__init__.py +2 -0
- Rhapso/fusion/affine_fusion/blend.py +289 -0
- Rhapso/fusion/affine_fusion/fusion.py +601 -0
- Rhapso/fusion/affine_fusion/geometry.py +159 -0
- Rhapso/fusion/affine_fusion/io.py +546 -0
- Rhapso/fusion/affine_fusion/script_utils.py +111 -0
- Rhapso/fusion/affine_fusion/setup.py +4 -0
- Rhapso/fusion/affine_fusion_worker.py +234 -0
- Rhapso/fusion/multiscale/__init__.py +0 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/__init__.py +19 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/compress/__init__.py +3 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/compress/czi_to_zarr.py +698 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/compress/zarr_writer.py +265 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/models.py +81 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/utils/__init__.py +3 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/utils/utils.py +526 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/zeiss_job.py +249 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/__init__.py +21 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/array_to_zarr.py +257 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/radial_correction.py +557 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/run_capsule.py +98 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/utils/__init__.py +3 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/utils/utils.py +266 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/worker.py +89 -0
- Rhapso/fusion/multiscale_worker.py +113 -0
- Rhapso/fusion/neuroglancer_link_gen/__init__.py +8 -0
- Rhapso/fusion/neuroglancer_link_gen/dispim_link.py +235 -0
- Rhapso/fusion/neuroglancer_link_gen/exaspim_link.py +127 -0
- Rhapso/fusion/neuroglancer_link_gen/hcr_link.py +368 -0
- Rhapso/fusion/neuroglancer_link_gen/iSPIM_top.py +47 -0
- Rhapso/fusion/neuroglancer_link_gen/link_utils.py +239 -0
- Rhapso/fusion/neuroglancer_link_gen/main.py +299 -0
- Rhapso/fusion/neuroglancer_link_gen/ng_layer.py +1434 -0
- Rhapso/fusion/neuroglancer_link_gen/ng_state.py +1123 -0
- Rhapso/fusion/neuroglancer_link_gen/parsers.py +336 -0
- Rhapso/fusion/neuroglancer_link_gen/raw_link.py +116 -0
- Rhapso/fusion/neuroglancer_link_gen/utils/__init__.py +4 -0
- Rhapso/fusion/neuroglancer_link_gen/utils/shader_utils.py +85 -0
- Rhapso/fusion/neuroglancer_link_gen/utils/transfer.py +43 -0
- Rhapso/fusion/neuroglancer_link_gen/utils/utils.py +303 -0
- Rhapso/fusion/neuroglancer_link_gen_worker.py +30 -0
- Rhapso/matching/__init__.py +0 -0
- Rhapso/matching/load_and_transform_points.py +458 -0
- Rhapso/matching/ransac_matching.py +544 -0
- Rhapso/matching/save_matches.py +120 -0
- Rhapso/matching/xml_parser.py +302 -0
- Rhapso/pipelines/__init__.py +0 -0
- Rhapso/pipelines/ray/__init__.py +0 -0
- Rhapso/pipelines/ray/aws/__init__.py +0 -0
- Rhapso/pipelines/ray/aws/alignment_pipeline.py +227 -0
- Rhapso/pipelines/ray/aws/config/__init__.py +0 -0
- Rhapso/pipelines/ray/evaluation.py +71 -0
- Rhapso/pipelines/ray/interest_point_detection.py +137 -0
- Rhapso/pipelines/ray/interest_point_matching.py +110 -0
- Rhapso/pipelines/ray/local/__init__.py +0 -0
- Rhapso/pipelines/ray/local/alignment_pipeline.py +167 -0
- Rhapso/pipelines/ray/matching_stats.py +104 -0
- Rhapso/pipelines/ray/param/__init__.py +0 -0
- Rhapso/pipelines/ray/solver.py +120 -0
- Rhapso/pipelines/ray/split_dataset.py +78 -0
- Rhapso/solver/__init__.py +0 -0
- Rhapso/solver/compute_tiles.py +562 -0
- Rhapso/solver/concatenate_models.py +116 -0
- Rhapso/solver/connected_graphs.py +111 -0
- Rhapso/solver/data_prep.py +181 -0
- Rhapso/solver/global_optimization.py +410 -0
- Rhapso/solver/model_and_tile_setup.py +109 -0
- Rhapso/solver/pre_align_tiles.py +323 -0
- Rhapso/solver/save_results.py +97 -0
- Rhapso/solver/view_transforms.py +75 -0
- Rhapso/solver/xml_to_dataframe_solver.py +213 -0
- Rhapso/split_dataset/__init__.py +0 -0
- Rhapso/split_dataset/compute_grid_rules.py +78 -0
- Rhapso/split_dataset/save_points.py +101 -0
- Rhapso/split_dataset/save_xml.py +377 -0
- Rhapso/split_dataset/split_images.py +537 -0
- Rhapso/split_dataset/xml_to_dataframe_split.py +219 -0
- rhapso-0.1.92.dist-info/METADATA +39 -0
- rhapso-0.1.92.dist-info/RECORD +101 -0
- rhapso-0.1.92.dist-info/WHEEL +5 -0
- rhapso-0.1.92.dist-info/licenses/LICENSE +21 -0
- rhapso-0.1.92.dist-info/top_level.txt +2 -0
- tests/__init__.py +1 -0
- tests/test_detection.py +17 -0
- tests/test_matching.py +21 -0
- tests/test_solving.py +21 -0
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for the radial correction step
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import List
|
|
9
|
+
from urllib.parse import urlparse
|
|
10
|
+
|
|
11
|
+
# Commented out - only needed for generate_processing function which is not used by worker
|
|
12
|
+
# from aind_data_schema.core.processing import (
|
|
13
|
+
# DataProcess,
|
|
14
|
+
# PipelineProcess,
|
|
15
|
+
# Processing,
|
|
16
|
+
# )
|
|
17
|
+
# from packaging import version
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# def generate_processing(
|
|
21
|
+
# data_processes: List[DataProcess],
|
|
22
|
+
# dest_processing: str,
|
|
23
|
+
# prefix: str,
|
|
24
|
+
# processor_full_name: str,
|
|
25
|
+
# pipeline_version: str,
|
|
26
|
+
# ):
|
|
27
|
+
# """
|
|
28
|
+
# Generates data description for the output folder.
|
|
29
|
+
#
|
|
30
|
+
# Parameters
|
|
31
|
+
# ------------------------
|
|
32
|
+
#
|
|
33
|
+
# data_processes: List[dict]
|
|
34
|
+
# List with the processes aplied in the pipeline.
|
|
35
|
+
#
|
|
36
|
+
# dest_processing: PathLike
|
|
37
|
+
# Path where the processing file will be placed.
|
|
38
|
+
#
|
|
39
|
+
# processor_full_name: str
|
|
40
|
+
# Person in charged of running the pipeline
|
|
41
|
+
# for this data asset
|
|
42
|
+
#
|
|
43
|
+
# pipeline_version: str
|
|
44
|
+
# Terastitcher pipeline version
|
|
45
|
+
#
|
|
46
|
+
# """
|
|
47
|
+
# # flake8: noqa: E501
|
|
48
|
+
# processing_pipeline = PipelineProcess(
|
|
49
|
+
# data_processes=data_processes,
|
|
50
|
+
# processor_full_name=processor_full_name,
|
|
51
|
+
# pipeline_version=pipeline_version,
|
|
52
|
+
# pipeline_url="",
|
|
53
|
+
# note="Metadata for radial correction",
|
|
54
|
+
# )
|
|
55
|
+
#
|
|
56
|
+
# processing = Processing(
|
|
57
|
+
# processing_pipeline=processing_pipeline,
|
|
58
|
+
# notes="This processing only contains metadata about radial correction \
|
|
59
|
+
# and needs to be compiled with other steps at the end",
|
|
60
|
+
# )
|
|
61
|
+
#
|
|
62
|
+
# processing.write_standard_file(
|
|
63
|
+
# output_directory=dest_processing, prefix=prefix
|
|
64
|
+
# )
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def read_json_as_dict(filepath: str) -> dict:
|
|
68
|
+
"""
|
|
69
|
+
Reads a json as dictionary.
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
------------------------
|
|
73
|
+
|
|
74
|
+
filepath: PathLike
|
|
75
|
+
Path where the json is located.
|
|
76
|
+
|
|
77
|
+
Returns
|
|
78
|
+
------------------------
|
|
79
|
+
|
|
80
|
+
dict:
|
|
81
|
+
Dictionary with the data the json has.
|
|
82
|
+
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
dictionary = {}
|
|
86
|
+
|
|
87
|
+
if os.path.exists(filepath):
|
|
88
|
+
with open(filepath) as json_file:
|
|
89
|
+
dictionary = json.load(json_file)
|
|
90
|
+
|
|
91
|
+
return dictionary
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def get_voxel_resolution(acquisition_path: Path) -> List[float]:
|
|
95
|
+
"""
|
|
96
|
+
Get the voxel resolution from an acquisition.json file.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
acquisition_path: Path
|
|
101
|
+
Path to the acquisition.json file.
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
List[float]
|
|
105
|
+
Voxel resolution in the format [z, y, x].
|
|
106
|
+
"""
|
|
107
|
+
if not Path(acquisition_path).is_file():
|
|
108
|
+
raise FileNotFoundError(
|
|
109
|
+
f"acquisition.json file not found at: {acquisition_path}"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
acquisition_config = read_json_as_dict(str(acquisition_path))
|
|
113
|
+
|
|
114
|
+
schema_version = acquisition_config.get("schema_version")
|
|
115
|
+
print(f"Schema version: {schema_version}")
|
|
116
|
+
|
|
117
|
+
if version.parse(schema_version) >= version.parse("2.0.0"):
|
|
118
|
+
return _get_voxel_resolution_v2(acquisition_config)
|
|
119
|
+
else:
|
|
120
|
+
return _get_voxel_resolution_v1(acquisition_config)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _get_voxel_resolution_v1(acquisition_config: dict) -> List[float]:
|
|
124
|
+
"""
|
|
125
|
+
Get the voxel resolution from an acquisition.json file.
|
|
126
|
+
|
|
127
|
+
Parameters
|
|
128
|
+
----------
|
|
129
|
+
acquisition_config: Dict
|
|
130
|
+
Dictionary with the acquisition.json data.
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
List[float]
|
|
134
|
+
Voxel resolution in the format [z, y, x].
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
if not acquisition_config:
|
|
138
|
+
raise ValueError("acquisition.json file is empty or invalid.")
|
|
139
|
+
|
|
140
|
+
# Grabbing a tile with metadata from acquisition - we assume all
|
|
141
|
+
# dataset was acquired with the same resolution
|
|
142
|
+
tile_coord_transforms = acquisition_config["tiles"][0][
|
|
143
|
+
"coordinate_transformations"
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
scale_transform = [
|
|
147
|
+
x["scale"] for x in tile_coord_transforms if x["type"] == "scale"
|
|
148
|
+
][0]
|
|
149
|
+
|
|
150
|
+
x = float(scale_transform[0])
|
|
151
|
+
y = float(scale_transform[1])
|
|
152
|
+
z = float(scale_transform[2])
|
|
153
|
+
|
|
154
|
+
return [z, y, x]
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _get_voxel_resolution_v2(acquisition_config: dict) -> List[float]:
|
|
158
|
+
"""
|
|
159
|
+
Get the voxel resolution from an acquisition.json in
|
|
160
|
+
aind-data-schema v2 format.
|
|
161
|
+
|
|
162
|
+
Parameters
|
|
163
|
+
----------
|
|
164
|
+
acquisition_config: Dict
|
|
165
|
+
Dictionary with the acquisition.json data.
|
|
166
|
+
|
|
167
|
+
Returns
|
|
168
|
+
-------
|
|
169
|
+
List[float]
|
|
170
|
+
Voxel resolution in the format [z, y, x].
|
|
171
|
+
"""
|
|
172
|
+
try:
|
|
173
|
+
data_stream = acquisition_config.get("data_streams", [])[0]
|
|
174
|
+
configuration = data_stream.get("configurations", [])[0]
|
|
175
|
+
image = configuration.get("images", [])[0]
|
|
176
|
+
image_to_acquisition_transform = image[
|
|
177
|
+
"image_to_acquisition_transform"
|
|
178
|
+
]
|
|
179
|
+
except (IndexError, AttributeError, KeyError) as e:
|
|
180
|
+
raise ValueError(
|
|
181
|
+
"acquisition_config structure is invalid or missing "
|
|
182
|
+
"required fields"
|
|
183
|
+
) from e
|
|
184
|
+
|
|
185
|
+
scale_transform = [
|
|
186
|
+
x["scale"]
|
|
187
|
+
for x in image_to_acquisition_transform
|
|
188
|
+
if x["object_type"] == "Scale"
|
|
189
|
+
][0]
|
|
190
|
+
|
|
191
|
+
x = float(scale_transform[0])
|
|
192
|
+
y = float(scale_transform[1])
|
|
193
|
+
z = float(scale_transform[2])
|
|
194
|
+
|
|
195
|
+
return [z, y, x]
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def validate_capsule_inputs(input_elements: List[str]) -> List[str]:
|
|
199
|
+
"""
|
|
200
|
+
Validates input elemts for a capsule in
|
|
201
|
+
Code Ocean.
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
-----------
|
|
205
|
+
input_elements: List[str]
|
|
206
|
+
Input elements for the capsule. This
|
|
207
|
+
could be sets of files or folders.
|
|
208
|
+
|
|
209
|
+
Returns
|
|
210
|
+
-----------
|
|
211
|
+
List[str]
|
|
212
|
+
List of missing files
|
|
213
|
+
"""
|
|
214
|
+
|
|
215
|
+
missing_inputs = []
|
|
216
|
+
for required_input_element in input_elements:
|
|
217
|
+
required_input_element = Path(required_input_element)
|
|
218
|
+
|
|
219
|
+
if not required_input_element.exists():
|
|
220
|
+
missing_inputs.append(str(required_input_element))
|
|
221
|
+
|
|
222
|
+
return missing_inputs
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def is_s3_path(path: str) -> bool:
|
|
226
|
+
"""
|
|
227
|
+
Checks if a path is an s3 path
|
|
228
|
+
|
|
229
|
+
Parameters
|
|
230
|
+
----------
|
|
231
|
+
path: str
|
|
232
|
+
Provided path
|
|
233
|
+
|
|
234
|
+
Returns
|
|
235
|
+
-------
|
|
236
|
+
bool
|
|
237
|
+
True if it is a S3 path,
|
|
238
|
+
False if not.
|
|
239
|
+
"""
|
|
240
|
+
parsed = urlparse(str(path))
|
|
241
|
+
return parsed.scheme == "s3"
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def get_parent_path(path: str) -> str:
|
|
245
|
+
"""
|
|
246
|
+
Gets parent path
|
|
247
|
+
|
|
248
|
+
Parameters
|
|
249
|
+
----------
|
|
250
|
+
path: str
|
|
251
|
+
Provided path
|
|
252
|
+
|
|
253
|
+
Returns
|
|
254
|
+
-------
|
|
255
|
+
str
|
|
256
|
+
Parent path
|
|
257
|
+
"""
|
|
258
|
+
parsed = urlparse(path)
|
|
259
|
+
if parsed.scheme == "s3":
|
|
260
|
+
# Remove the last part of the S3 key
|
|
261
|
+
parts = parsed.path.strip("/").split("/")
|
|
262
|
+
parent_key = "/".join(parts[:-1])
|
|
263
|
+
return f"s3://{parsed.netloc}/{parent_key}"
|
|
264
|
+
else:
|
|
265
|
+
# Local path fallback
|
|
266
|
+
return str(Path(path).parent)
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Worker script to run multiscale conversion on a zarr dataset
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
import dask.array as da
|
|
9
|
+
|
|
10
|
+
from Rhapso.fusion.multiscale.aind_z1_radial_correction.array_to_zarr import convert_array_to_zarr
|
|
11
|
+
from Rhapso.fusion.multiscale.aind_z1_radial_correction.utils import utils
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def run():
|
|
15
|
+
"""
|
|
16
|
+
Main run function for multiscale conversion
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
# Input and output paths
|
|
20
|
+
input_zarr_path = "s3://martin-test-bucket/output/channel_488.zarr"
|
|
21
|
+
output_zarr_path = "s3://martin-test-bucket/output/multiscale_channel_488.zarr"
|
|
22
|
+
|
|
23
|
+
print(f"Loading data from: {input_zarr_path}")
|
|
24
|
+
|
|
25
|
+
# Load the zarr dataset
|
|
26
|
+
# Assuming the data is in the root or scale "0" of the zarr
|
|
27
|
+
try:
|
|
28
|
+
# Try loading from scale "0" first (common for OME-Zarr)
|
|
29
|
+
dataset = da.from_zarr(f"{input_zarr_path}/0")
|
|
30
|
+
print(f"Loaded data from {input_zarr_path}/0")
|
|
31
|
+
except Exception as e:
|
|
32
|
+
# If scale "0" doesn't exist, try loading from root
|
|
33
|
+
print(f"Could not load from scale 0: {e}")
|
|
34
|
+
try:
|
|
35
|
+
dataset = da.from_zarr(input_zarr_path)
|
|
36
|
+
print(f"Loaded data from {input_zarr_path}")
|
|
37
|
+
except Exception as e2:
|
|
38
|
+
raise ValueError(
|
|
39
|
+
f"Could not load data from {input_zarr_path} or {input_zarr_path}/0. Error: {e2}"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
print(f"Dataset shape: {dataset.shape}")
|
|
43
|
+
print(f"Dataset dtype: {dataset.dtype}")
|
|
44
|
+
print(f"Dataset chunks: {dataset.chunks}")
|
|
45
|
+
|
|
46
|
+
# Compute the array (load into memory)
|
|
47
|
+
# For large datasets, you might want to process this differently
|
|
48
|
+
print("Computing array...")
|
|
49
|
+
array = dataset.compute()
|
|
50
|
+
print("Array computed successfully")
|
|
51
|
+
|
|
52
|
+
# Set parameters for multiscale conversion
|
|
53
|
+
# Adjust these parameters based on your data characteristics
|
|
54
|
+
chunk_size = [128, 128, 128] # Chunk size for the output zarr
|
|
55
|
+
voxel_size = [1.0, 1.0, 1.0] # Voxel size in micrometers (adjust if known)
|
|
56
|
+
n_lvls = 6 # Number of pyramid levels
|
|
57
|
+
scale_factor = [2, 2, 2] # Downsampling factor per level
|
|
58
|
+
|
|
59
|
+
compressor_kwargs = {
|
|
60
|
+
"cname": "zstd",
|
|
61
|
+
"clevel": 3,
|
|
62
|
+
"shuffle": 2, # Blosc.SHUFFLE
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
print(f"Converting to multiscale format and writing to: {output_zarr_path}")
|
|
66
|
+
print(f"Chunk size: {chunk_size}")
|
|
67
|
+
print(f"Voxel size: {voxel_size}")
|
|
68
|
+
print(f"Number of levels: {n_lvls}")
|
|
69
|
+
print(f"Scale factor: {scale_factor}")
|
|
70
|
+
|
|
71
|
+
# Convert to multiscale zarr
|
|
72
|
+
convert_array_to_zarr(
|
|
73
|
+
array=array,
|
|
74
|
+
chunk_size=chunk_size,
|
|
75
|
+
output_path=output_zarr_path,
|
|
76
|
+
voxel_size=voxel_size,
|
|
77
|
+
n_lvls=n_lvls,
|
|
78
|
+
scale_factor=scale_factor,
|
|
79
|
+
compressor_kwargs=compressor_kwargs,
|
|
80
|
+
target_size_mb=24000,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
print(f"Multiscale conversion completed successfully!")
|
|
84
|
+
print(f"Output written to: {output_zarr_path}")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
if __name__ == "__main__":
|
|
88
|
+
run()
|
|
89
|
+
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Worker script to run multiscale conversion on a zarr dataset
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
import dask.array as da
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
from Rhapso.fusion.multiscale.aind_z1_radial_correction.array_to_zarr import convert_array_to_zarr
|
|
12
|
+
|
|
13
|
+
# Setup logging
|
|
14
|
+
logging.basicConfig(
|
|
15
|
+
level=logging.INFO,
|
|
16
|
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
17
|
+
)
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def run():
|
|
22
|
+
"""
|
|
23
|
+
Main run function for multiscale conversion
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
# Input and output paths
|
|
27
|
+
input_zarr_path = "s3://martin-test-bucket/output7/channel_488.zarr"
|
|
28
|
+
output_zarr_path = "s3://martin-test-bucket/output7/multiscale_channel_488.zarr"
|
|
29
|
+
|
|
30
|
+
# Set parameters for multiscale conversion
|
|
31
|
+
# Adjust these parameters based on your data characteristics
|
|
32
|
+
chunk_size = [128, 128, 128] # Chunk size for the output zarr
|
|
33
|
+
voxel_size = [1.0, 1.0, 1.0] # Voxel size in micrometers (adjust if known)
|
|
34
|
+
n_lvls = 6 # Number of pyramid levels
|
|
35
|
+
scale_factor = [2, 2, 2] # Downsampling factor per level
|
|
36
|
+
|
|
37
|
+
logger.info(f"Starting multiscale conversion")
|
|
38
|
+
logger.info(f"Input: {input_zarr_path}")
|
|
39
|
+
logger.info(f"Output: {output_zarr_path}")
|
|
40
|
+
|
|
41
|
+
# Load the zarr dataset
|
|
42
|
+
# Assuming the data is in the root or scale "0" of the zarr
|
|
43
|
+
try:
|
|
44
|
+
# Try loading from scale "0" first (common for OME-Zarr)
|
|
45
|
+
logger.info(f"Attempting to load from {input_zarr_path}/0...")
|
|
46
|
+
sys.stdout.flush()
|
|
47
|
+
dataset = da.from_zarr(f"{input_zarr_path}/0")
|
|
48
|
+
logger.info(f"Successfully loaded data from {input_zarr_path}/0")
|
|
49
|
+
except Exception as e:
|
|
50
|
+
# If scale "0" doesn't exist, try loading from root
|
|
51
|
+
logger.warning(f"Could not load from scale 0: {e}")
|
|
52
|
+
try:
|
|
53
|
+
logger.info(f"Attempting to load from {input_zarr_path}...")
|
|
54
|
+
sys.stdout.flush()
|
|
55
|
+
dataset = da.from_zarr(input_zarr_path)
|
|
56
|
+
logger.info(f"Successfully loaded data from {input_zarr_path}")
|
|
57
|
+
except Exception as e2:
|
|
58
|
+
logger.error(f"Failed to load data: {e2}")
|
|
59
|
+
raise ValueError(
|
|
60
|
+
f"Could not load data from {input_zarr_path} or {input_zarr_path}/0. Error: {e2}"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
logger.info(f"Dataset shape: {dataset.shape}")
|
|
64
|
+
logger.info(f"Dataset dtype: {dataset.dtype}")
|
|
65
|
+
logger.info(f"Dataset chunks: {dataset.chunks}")
|
|
66
|
+
|
|
67
|
+
# Calculate dataset size
|
|
68
|
+
import numpy as np
|
|
69
|
+
dtype_bytes = np.dtype(dataset.dtype).itemsize
|
|
70
|
+
total_size_gb = np.prod(dataset.shape) * dtype_bytes / (1024**3)
|
|
71
|
+
logger.info(f"Dataset size: {total_size_gb:.2f} GB")
|
|
72
|
+
|
|
73
|
+
# Use dask array directly instead of computing (don't load into memory)
|
|
74
|
+
logger.info("Using Dask array for lazy/chunked processing (not loading into memory)")
|
|
75
|
+
array = dataset
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
compressor_kwargs = {
|
|
79
|
+
"cname": "zstd",
|
|
80
|
+
"clevel": 3,
|
|
81
|
+
"shuffle": 2, # Blosc.SHUFFLE
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
logger.info("=" * 60)
|
|
85
|
+
logger.info("Starting multiscale conversion with parameters:")
|
|
86
|
+
logger.info(f" Output path: {output_zarr_path}")
|
|
87
|
+
logger.info(f" Chunk size: {chunk_size}")
|
|
88
|
+
logger.info(f" Voxel size: {voxel_size}")
|
|
89
|
+
logger.info(f" Number of levels: {n_lvls}")
|
|
90
|
+
logger.info(f" Scale factor: {scale_factor}")
|
|
91
|
+
logger.info("=" * 60)
|
|
92
|
+
sys.stdout.flush()
|
|
93
|
+
|
|
94
|
+
# Convert to multiscale zarr
|
|
95
|
+
convert_array_to_zarr(
|
|
96
|
+
array=array,
|
|
97
|
+
chunk_size=chunk_size,
|
|
98
|
+
output_path=output_zarr_path,
|
|
99
|
+
voxel_size=voxel_size,
|
|
100
|
+
n_lvls=n_lvls,
|
|
101
|
+
scale_factor=scale_factor,
|
|
102
|
+
compressor_kwargs=compressor_kwargs,
|
|
103
|
+
target_size_mb=24000,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
logger.info("=" * 60)
|
|
107
|
+
logger.info("MULTISCALE CONVERSION COMPLETED SUCCESSFULLY!")
|
|
108
|
+
logger.info(f"Output written to: {output_zarr_path}")
|
|
109
|
+
logger.info("=" * 60)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
if __name__ == "__main__":
|
|
113
|
+
run()
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Library for generating dispim link.
|
|
3
|
+
"""
|
|
4
|
+
import pathlib
|
|
5
|
+
|
|
6
|
+
from . import link_utils
|
|
7
|
+
import numpy as np
|
|
8
|
+
from ng_state import NgState
|
|
9
|
+
from parsers import XmlParser
|
|
10
|
+
from utils import transfer
|
|
11
|
+
from typing import List, Dict, Tuple
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def apply_deskewing(matrix_3x4: np.ndarray, theta: float = 45) -> np.ndarray:
|
|
15
|
+
"""
|
|
16
|
+
Compounds deskewing transform to input 3x4 matrix:
|
|
17
|
+
Deskewing @ Input_3x4_Matrix.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
------------------------
|
|
21
|
+
matrix_3x4: np.ndarray
|
|
22
|
+
3x4 numpy array representing transformation matrix applied to tile.
|
|
23
|
+
|
|
24
|
+
theta: float
|
|
25
|
+
Angle of lens during acquisition.
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
------------------------
|
|
29
|
+
np.ndarray:
|
|
30
|
+
3x4 numpy array composite transform.
|
|
31
|
+
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
# Deskewing
|
|
35
|
+
# X vector => XZ direction
|
|
36
|
+
deskew_factor = np.tan(np.deg2rad(theta))
|
|
37
|
+
deskew = np.array([[1, 0, 0], [0, 1, 0], [deskew_factor, 0, 1]])
|
|
38
|
+
matrix_3x4 = deskew @ matrix_3x4
|
|
39
|
+
|
|
40
|
+
return matrix_3x4
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def generate_dispim_link(
|
|
44
|
+
base_channel_xml_path: str,
|
|
45
|
+
# cross_channel_xml_path: str,
|
|
46
|
+
s3_path: str,
|
|
47
|
+
max_dr: int = 800,
|
|
48
|
+
opacity: float = 0.5,
|
|
49
|
+
blend: str = "additive",
|
|
50
|
+
deskew_angle: int = 45,
|
|
51
|
+
output_json_path: str = ".",
|
|
52
|
+
spim_foldername="SPIM.ome.zarr",
|
|
53
|
+
) -> None:
|
|
54
|
+
"""
|
|
55
|
+
Creates an neuroglancer link to visualize
|
|
56
|
+
registration transforms on dispim dataset pre-fusion.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
------------------------
|
|
60
|
+
base_channel_xml_path: str
|
|
61
|
+
Path to xml file acquired from tile-to-tile
|
|
62
|
+
registration within the base channel.
|
|
63
|
+
These registrations are reused for
|
|
64
|
+
registering tiles in all other channels.
|
|
65
|
+
|
|
66
|
+
cross_channel_xml_path: str
|
|
67
|
+
Path to xml file acquired from channel-to-channel registration.
|
|
68
|
+
These registrations are prepended to each tile registration.
|
|
69
|
+
|
|
70
|
+
s3_path: str
|
|
71
|
+
Path of s3 bucket where dipim dataset is located.
|
|
72
|
+
|
|
73
|
+
output_json_path: str
|
|
74
|
+
Local path to write process_output.json file that nueroglancer reads.
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
------------------------
|
|
78
|
+
None
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
# Gather base channel xml info
|
|
82
|
+
vox_sizes: Tuple[float, float, float] = XmlParser.extract_tile_vox_size(
|
|
83
|
+
base_channel_xml_path
|
|
84
|
+
)
|
|
85
|
+
tile_paths: Dict[int, str] = XmlParser.extract_tile_paths(
|
|
86
|
+
base_channel_xml_path
|
|
87
|
+
)
|
|
88
|
+
tile_transforms: Dict[int, List[Dict]] = XmlParser.extract_tile_transforms(
|
|
89
|
+
base_channel_xml_path
|
|
90
|
+
)
|
|
91
|
+
intertile_transforms: Dict[
|
|
92
|
+
int, np.ndarray
|
|
93
|
+
] = link_utils.calculate_net_transforms(tile_transforms)
|
|
94
|
+
base_channel: int = link_utils.extract_channel_from_tile_path(
|
|
95
|
+
tile_paths[0]
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
channels: List[int] = link_utils.get_unique_channels_for_dataset(
|
|
99
|
+
s3_path + spim_foldername
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Generate input config
|
|
103
|
+
layers = [] # Represent Neuroglancer Tabs
|
|
104
|
+
input_config = {
|
|
105
|
+
"dimensions": {
|
|
106
|
+
"x": {"voxel_size": vox_sizes[0], "unit": "microns"},
|
|
107
|
+
"y": {"voxel_size": vox_sizes[1], "unit": "microns"},
|
|
108
|
+
# reverse the order from bigstitcher again
|
|
109
|
+
"z": {"voxel_size": vox_sizes[2], "unit": "microns"},
|
|
110
|
+
"c'": {"voxel_size": 1, "unit": ""},
|
|
111
|
+
"t": {"voxel_size": 0.001, "unit": "seconds"},
|
|
112
|
+
},
|
|
113
|
+
"layers": layers,
|
|
114
|
+
"showScaleBar": False,
|
|
115
|
+
"showAxisLines": False,
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
for channel in channels:
|
|
119
|
+
# Determine color of this layer
|
|
120
|
+
hex_val: int = link_utils.wavelength_to_hex(channel)
|
|
121
|
+
hex_str = f"#{str(hex(hex_val))[2:]}"
|
|
122
|
+
|
|
123
|
+
# Init new list of sources for each channel
|
|
124
|
+
sources = [] # Represent Tiles w/in Tabs
|
|
125
|
+
layers.append(
|
|
126
|
+
{
|
|
127
|
+
"type": "image", # Optional
|
|
128
|
+
"source": sources,
|
|
129
|
+
"channel": 0, # Optional
|
|
130
|
+
"shaderControls": {
|
|
131
|
+
"normalized": {"range": [90, max_dr]}
|
|
132
|
+
}, # Optional
|
|
133
|
+
"shader": {"color": hex_str, "emitter": "RGB", "vec": "vec3",},
|
|
134
|
+
"visible": True, # Optional
|
|
135
|
+
"opacity": opacity,
|
|
136
|
+
"name": f"CH_{channel}",
|
|
137
|
+
"blend": blend,
|
|
138
|
+
}
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
for tile_id in range(len(intertile_transforms)):
|
|
142
|
+
# Get base tile path, modify path across channels
|
|
143
|
+
base_t_path = tile_paths[tile_id]
|
|
144
|
+
t_path = base_t_path.replace(f"{base_channel}", f"{channel}")
|
|
145
|
+
|
|
146
|
+
# Get net transform
|
|
147
|
+
intertile_tf = intertile_transforms[tile_id]
|
|
148
|
+
i_matrix_3x3 = intertile_tf[:, 0:3]
|
|
149
|
+
i_translation = intertile_tf[:, 3]
|
|
150
|
+
|
|
151
|
+
net_matrix_3x3 = i_matrix_3x3 # NOTE: Right-multiply
|
|
152
|
+
net_translation = i_translation
|
|
153
|
+
net_tf = np.hstack((net_matrix_3x3, net_translation.reshape(3, 1)))
|
|
154
|
+
|
|
155
|
+
# net_tf = apply_deskewing(net_tf, deskew_angle)
|
|
156
|
+
|
|
157
|
+
# Add (path, transform) source entry
|
|
158
|
+
if s3_path.endswith("/"):
|
|
159
|
+
url = f"{s3_path}{spim_foldername}/{t_path}"
|
|
160
|
+
else:
|
|
161
|
+
url = f"{s3_path}/{spim_foldername}/{t_path}"
|
|
162
|
+
|
|
163
|
+
final_transform = link_utils.convert_matrix_3x4_to_5x6(net_tf)
|
|
164
|
+
sources.append(
|
|
165
|
+
{"url": url, "transform_matrix": final_transform.tolist()}
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
bucket_name, prefix = s3_path.replace("s3://", "").split("/", 1)
|
|
169
|
+
prefix = prefix[:-1] # remove trailing '/'
|
|
170
|
+
# Generate the link
|
|
171
|
+
neuroglancer_link = NgState(
|
|
172
|
+
input_config=input_config,
|
|
173
|
+
mount_service="s3",
|
|
174
|
+
bucket_path=f"{bucket_name}",
|
|
175
|
+
output_dir=output_json_path,
|
|
176
|
+
base_url="https://aind-neuroglancer-sauujisjxq-uw.a.run.app/",
|
|
177
|
+
)
|
|
178
|
+
neuroglancer_link.save_state_as_json()
|
|
179
|
+
print(neuroglancer_link.get_url_link())
|
|
180
|
+
return neuroglancer_link.get_url_link()
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def ingest_xml_and_write_ng_link(
|
|
184
|
+
xml_path: str, s3_bucket: str = "aind-open-data"
|
|
185
|
+
):
|
|
186
|
+
"""A wrapper function that autogenerates the s3_path
|
|
187
|
+
for dispim_link.generate_dispim_link
|
|
188
|
+
|
|
189
|
+
Automatically saves process_output.json, which can be
|
|
190
|
+
manually uploaded to S3 bucket/dataset.
|
|
191
|
+
|
|
192
|
+
Parameters:
|
|
193
|
+
----------
|
|
194
|
+
xml_path: str
|
|
195
|
+
Relative path to xml file (bigstitcher format) that
|
|
196
|
+
contains tile position information
|
|
197
|
+
|
|
198
|
+
s3_bucket:str
|
|
199
|
+
name of s3 bucket where the dataset lives
|
|
200
|
+
|
|
201
|
+
Return:
|
|
202
|
+
-------
|
|
203
|
+
link: str
|
|
204
|
+
Neuroglancer link for xml dataset.
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
"""
|
|
208
|
+
# read_xml and get dataset prefix for S3
|
|
209
|
+
dataset_path = XmlParser.extract_dataset_path(xml_path)
|
|
210
|
+
dataset_name = dataset_path.split("/")[2]
|
|
211
|
+
|
|
212
|
+
# print(f"dataset_path {dataset_path}")
|
|
213
|
+
# print(f"dataset_name {dataset_name}")
|
|
214
|
+
|
|
215
|
+
s3_path = f"s3://{s3_bucket}/{dataset_name}/"
|
|
216
|
+
|
|
217
|
+
output_folder = f"/results/{dataset_name}/"
|
|
218
|
+
|
|
219
|
+
if not pathlib.Path(output_folder).exists():
|
|
220
|
+
pathlib.Path(output_folder).mkdir(parents=True, exist_ok=True)
|
|
221
|
+
|
|
222
|
+
link = generate_dispim_link(
|
|
223
|
+
xml_path,
|
|
224
|
+
s3_path,
|
|
225
|
+
max_dr=400,
|
|
226
|
+
opacity=1.0,
|
|
227
|
+
blend="additive",
|
|
228
|
+
output_json_path=output_folder,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# copy output json to s3 bucket dataset
|
|
232
|
+
|
|
233
|
+
transfer.copy_to_s3(output_folder + "process_output.json", s3_path)
|
|
234
|
+
|
|
235
|
+
return link
|