Rhapso 0.1.92__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. Rhapso/__init__.py +1 -0
  2. Rhapso/data_prep/__init__.py +2 -0
  3. Rhapso/data_prep/n5_reader.py +188 -0
  4. Rhapso/data_prep/s3_big_stitcher_reader.py +55 -0
  5. Rhapso/data_prep/xml_to_dataframe.py +215 -0
  6. Rhapso/detection/__init__.py +5 -0
  7. Rhapso/detection/advanced_refinement.py +203 -0
  8. Rhapso/detection/difference_of_gaussian.py +324 -0
  9. Rhapso/detection/image_reader.py +117 -0
  10. Rhapso/detection/metadata_builder.py +130 -0
  11. Rhapso/detection/overlap_detection.py +327 -0
  12. Rhapso/detection/points_validation.py +49 -0
  13. Rhapso/detection/save_interest_points.py +265 -0
  14. Rhapso/detection/view_transform_models.py +67 -0
  15. Rhapso/fusion/__init__.py +0 -0
  16. Rhapso/fusion/affine_fusion/__init__.py +2 -0
  17. Rhapso/fusion/affine_fusion/blend.py +289 -0
  18. Rhapso/fusion/affine_fusion/fusion.py +601 -0
  19. Rhapso/fusion/affine_fusion/geometry.py +159 -0
  20. Rhapso/fusion/affine_fusion/io.py +546 -0
  21. Rhapso/fusion/affine_fusion/script_utils.py +111 -0
  22. Rhapso/fusion/affine_fusion/setup.py +4 -0
  23. Rhapso/fusion/affine_fusion_worker.py +234 -0
  24. Rhapso/fusion/multiscale/__init__.py +0 -0
  25. Rhapso/fusion/multiscale/aind_hcr_data_transformation/__init__.py +19 -0
  26. Rhapso/fusion/multiscale/aind_hcr_data_transformation/compress/__init__.py +3 -0
  27. Rhapso/fusion/multiscale/aind_hcr_data_transformation/compress/czi_to_zarr.py +698 -0
  28. Rhapso/fusion/multiscale/aind_hcr_data_transformation/compress/zarr_writer.py +265 -0
  29. Rhapso/fusion/multiscale/aind_hcr_data_transformation/models.py +81 -0
  30. Rhapso/fusion/multiscale/aind_hcr_data_transformation/utils/__init__.py +3 -0
  31. Rhapso/fusion/multiscale/aind_hcr_data_transformation/utils/utils.py +526 -0
  32. Rhapso/fusion/multiscale/aind_hcr_data_transformation/zeiss_job.py +249 -0
  33. Rhapso/fusion/multiscale/aind_z1_radial_correction/__init__.py +21 -0
  34. Rhapso/fusion/multiscale/aind_z1_radial_correction/array_to_zarr.py +257 -0
  35. Rhapso/fusion/multiscale/aind_z1_radial_correction/radial_correction.py +557 -0
  36. Rhapso/fusion/multiscale/aind_z1_radial_correction/run_capsule.py +98 -0
  37. Rhapso/fusion/multiscale/aind_z1_radial_correction/utils/__init__.py +3 -0
  38. Rhapso/fusion/multiscale/aind_z1_radial_correction/utils/utils.py +266 -0
  39. Rhapso/fusion/multiscale/aind_z1_radial_correction/worker.py +89 -0
  40. Rhapso/fusion/multiscale_worker.py +113 -0
  41. Rhapso/fusion/neuroglancer_link_gen/__init__.py +8 -0
  42. Rhapso/fusion/neuroglancer_link_gen/dispim_link.py +235 -0
  43. Rhapso/fusion/neuroglancer_link_gen/exaspim_link.py +127 -0
  44. Rhapso/fusion/neuroglancer_link_gen/hcr_link.py +368 -0
  45. Rhapso/fusion/neuroglancer_link_gen/iSPIM_top.py +47 -0
  46. Rhapso/fusion/neuroglancer_link_gen/link_utils.py +239 -0
  47. Rhapso/fusion/neuroglancer_link_gen/main.py +299 -0
  48. Rhapso/fusion/neuroglancer_link_gen/ng_layer.py +1434 -0
  49. Rhapso/fusion/neuroglancer_link_gen/ng_state.py +1123 -0
  50. Rhapso/fusion/neuroglancer_link_gen/parsers.py +336 -0
  51. Rhapso/fusion/neuroglancer_link_gen/raw_link.py +116 -0
  52. Rhapso/fusion/neuroglancer_link_gen/utils/__init__.py +4 -0
  53. Rhapso/fusion/neuroglancer_link_gen/utils/shader_utils.py +85 -0
  54. Rhapso/fusion/neuroglancer_link_gen/utils/transfer.py +43 -0
  55. Rhapso/fusion/neuroglancer_link_gen/utils/utils.py +303 -0
  56. Rhapso/fusion/neuroglancer_link_gen_worker.py +30 -0
  57. Rhapso/matching/__init__.py +0 -0
  58. Rhapso/matching/load_and_transform_points.py +458 -0
  59. Rhapso/matching/ransac_matching.py +544 -0
  60. Rhapso/matching/save_matches.py +120 -0
  61. Rhapso/matching/xml_parser.py +302 -0
  62. Rhapso/pipelines/__init__.py +0 -0
  63. Rhapso/pipelines/ray/__init__.py +0 -0
  64. Rhapso/pipelines/ray/aws/__init__.py +0 -0
  65. Rhapso/pipelines/ray/aws/alignment_pipeline.py +227 -0
  66. Rhapso/pipelines/ray/aws/config/__init__.py +0 -0
  67. Rhapso/pipelines/ray/evaluation.py +71 -0
  68. Rhapso/pipelines/ray/interest_point_detection.py +137 -0
  69. Rhapso/pipelines/ray/interest_point_matching.py +110 -0
  70. Rhapso/pipelines/ray/local/__init__.py +0 -0
  71. Rhapso/pipelines/ray/local/alignment_pipeline.py +167 -0
  72. Rhapso/pipelines/ray/matching_stats.py +104 -0
  73. Rhapso/pipelines/ray/param/__init__.py +0 -0
  74. Rhapso/pipelines/ray/solver.py +120 -0
  75. Rhapso/pipelines/ray/split_dataset.py +78 -0
  76. Rhapso/solver/__init__.py +0 -0
  77. Rhapso/solver/compute_tiles.py +562 -0
  78. Rhapso/solver/concatenate_models.py +116 -0
  79. Rhapso/solver/connected_graphs.py +111 -0
  80. Rhapso/solver/data_prep.py +181 -0
  81. Rhapso/solver/global_optimization.py +410 -0
  82. Rhapso/solver/model_and_tile_setup.py +109 -0
  83. Rhapso/solver/pre_align_tiles.py +323 -0
  84. Rhapso/solver/save_results.py +97 -0
  85. Rhapso/solver/view_transforms.py +75 -0
  86. Rhapso/solver/xml_to_dataframe_solver.py +213 -0
  87. Rhapso/split_dataset/__init__.py +0 -0
  88. Rhapso/split_dataset/compute_grid_rules.py +78 -0
  89. Rhapso/split_dataset/save_points.py +101 -0
  90. Rhapso/split_dataset/save_xml.py +377 -0
  91. Rhapso/split_dataset/split_images.py +537 -0
  92. Rhapso/split_dataset/xml_to_dataframe_split.py +219 -0
  93. rhapso-0.1.92.dist-info/METADATA +39 -0
  94. rhapso-0.1.92.dist-info/RECORD +101 -0
  95. rhapso-0.1.92.dist-info/WHEEL +5 -0
  96. rhapso-0.1.92.dist-info/licenses/LICENSE +21 -0
  97. rhapso-0.1.92.dist-info/top_level.txt +2 -0
  98. tests/__init__.py +1 -0
  99. tests/test_detection.py +17 -0
  100. tests/test_matching.py +21 -0
  101. tests/test_solving.py +21 -0
@@ -0,0 +1,265 @@
1
+ """
2
+ This module defines a class that takes
3
+ big chunks (compilation of chunks) from
4
+ a dask array and writes it on disk in
5
+ zarr format
6
+ """
7
+
8
+ import logging
9
+ from typing import Generator, Tuple
10
+
11
+ import dask.array as da
12
+ import numpy as np
13
+ from numpy.typing import ArrayLike
14
+
15
+
16
+ def _get_size(shape: Tuple[int, ...], itemsize: int) -> int:
17
+ """
18
+ Return the size of an array with the given shape, in bytes
19
+ Args:
20
+ shape: the shape of the array
21
+ itemsize: number of bytes per array element
22
+ Returns:
23
+ the size of the array, in bytes
24
+ """
25
+ if any(s <= 0 for s in shape):
26
+ raise ValueError("shape must be > 0 in all dimensions")
27
+ return np.prod(shape) * itemsize
28
+
29
+
30
+ def _closer_to_target(
31
+ shape1: Tuple[int, ...],
32
+ shape2: Tuple[int, ...],
33
+ target_bytes: int,
34
+ itemsize: int,
35
+ ) -> Tuple[int, ...]:
36
+ """
37
+ Given two shapes with the same number of dimensions,
38
+ find which one is closer to target_bytes.
39
+ Args:
40
+ shape1: the first shape
41
+ shape2: the second shape
42
+ target_bytes: the target size for the returned shape
43
+ itemsize: number of bytes per array element
44
+ """
45
+ size1 = float(_get_size(shape1, itemsize))
46
+ size2 = float(_get_size(shape2, itemsize))
47
+ if abs(size1 - target_bytes) < abs(size2 - target_bytes):
48
+ return shape1
49
+ return shape2
50
+
51
+
52
+ def expand_chunks( # noqa: C901
53
+ chunks: Tuple[int, int, int],
54
+ data_shape: Tuple[int, int, int],
55
+ target_size: int,
56
+ itemsize: int,
57
+ mode: str = "iso",
58
+ ) -> Tuple[int, int, int]:
59
+ """
60
+ Given the shape and chunk size of a pre-chunked 3D array,
61
+ determine the optimal chunk shape closest to target_size.
62
+ Expanded chunk dimensions are an integer multiple of
63
+ the base chunk dimension, to ensure optimal access patterns.
64
+
65
+ Args:
66
+ chunks: the shape of the input array chunks
67
+ data_shape: the shape of the input array
68
+ target_size: target chunk size in bytes
69
+ itemsize: the number of bytes per array element
70
+ mode: chunking strategy. Must be one of "cycle", or "iso"
71
+ Returns:
72
+ the optimal chunk shape
73
+ """
74
+ if any(c < 1 for c in chunks):
75
+ raise ValueError("chunks must be >= 1 for all dimensions")
76
+ if any(s < 1 for s in data_shape):
77
+ raise ValueError("data_shape must be >= 1 for all dimensions")
78
+ if any(c > s for c, s in zip(chunks, data_shape)):
79
+ raise ValueError(
80
+ "chunks cannot be larger than data_shape in any dimension"
81
+ )
82
+ if target_size <= 0:
83
+ raise ValueError("target_size must be > 0")
84
+ if itemsize <= 0:
85
+ raise ValueError("itemsize must be > 0")
86
+ if mode == "cycle":
87
+ # get the spatial dimensions only
88
+ current = np.array(chunks, dtype=np.uint64)
89
+ prev = current.copy()
90
+ idx = 0
91
+ ndims = len(current)
92
+ while _get_size(current, itemsize) < target_size:
93
+ prev = current.copy()
94
+ current[idx % ndims] = min(
95
+ data_shape[idx % ndims], current[idx % ndims] * 2
96
+ )
97
+ idx += 1
98
+ if all(c >= s for c, s in zip(current, data_shape)):
99
+ break
100
+ expanded = _closer_to_target(current, prev, target_size, itemsize)
101
+ elif mode == "iso":
102
+ initial = np.array(chunks, dtype=np.uint64)
103
+ current = initial
104
+ prev = current
105
+ i = 2
106
+ while _get_size(current, itemsize) < target_size:
107
+ prev = current
108
+ current = initial * i
109
+ current = (
110
+ min(data_shape[0], current[0]),
111
+ min(data_shape[1], current[1]),
112
+ min(data_shape[2], current[2]),
113
+ )
114
+ i += 1
115
+ if all(c >= s for c, s in zip(current, data_shape)):
116
+ break
117
+ expanded = _closer_to_target(current, prev, target_size, itemsize)
118
+ else:
119
+ raise ValueError(f"Invalid mode {mode}")
120
+
121
+ return tuple(int(d) for d in expanded)
122
+
123
+
124
+ class BlockedArrayWriter:
125
+ """
126
+ Static class to write a lazy array
127
+ in big chunks to OMEZarr
128
+ """
129
+
130
+ @staticmethod
131
+ def gen_slices(
132
+ arr_shape: Tuple[int, ...], block_shape: Tuple[int, ...]
133
+ ) -> Generator:
134
+ """
135
+ Generate a series of slices that can be
136
+ used to traverse an array in blocks of a given shape.
137
+
138
+ The method generates tuples of slices, each representing
139
+ a block of the array. The blocks are generated by
140
+ iterating over the array in steps of the block
141
+ shape along each dimension.
142
+
143
+ Parameters
144
+ ----------
145
+ arr_shape : tuple of int
146
+ The shape of the array to be sliced.
147
+
148
+ block_shape : tuple of int
149
+ The desired shape of the blocks. This should be a
150
+ tuple of integers representing the size of each
151
+ dimension of the block. The length of `block_shape`
152
+ should be equal to the length of `arr_shape`.
153
+ If the array shape is not divisible by the block
154
+ shape along a dimension, the last slice
155
+ along that dimension is truncated.
156
+
157
+ Returns
158
+ -------
159
+ generator of tuple of slice
160
+ A generator yielding tuples of slices.
161
+ Each tuple can be used to index the input array.
162
+ """
163
+ if len(arr_shape) != len(block_shape):
164
+ raise Exception(
165
+ "array shape and block shape have different lengths"
166
+ )
167
+
168
+ def _slice_along_dim(dim: int) -> Generator:
169
+ """
170
+ A helper generator function that
171
+ slices along one dimension.
172
+ """
173
+ # Base case: if the dimension is beyond
174
+ # the last one, return an empty tuple
175
+ if dim >= len(arr_shape):
176
+ yield ()
177
+ else:
178
+ # Iterate over the current dimension in steps of the block size
179
+ for i in range(0, arr_shape[dim], block_shape[dim]):
180
+ # Calculate the end index for this block
181
+ end_i = min(i + block_shape[dim], arr_shape[dim])
182
+ # Generate slices for the remaining dimensions
183
+ for rest in _slice_along_dim(dim + 1):
184
+ yield (slice(i, end_i),) + rest
185
+
186
+ # Start slicing along the first dimension
187
+ return _slice_along_dim(0)
188
+
189
+ @staticmethod
190
+ def store(
191
+ in_array: da.Array, out_array: ArrayLike, block_shape: tuple
192
+ ) -> None:
193
+ """
194
+ Partitions the last 3 dimensions of a Dask array
195
+ into non-overlapping blocks and writes them sequentially
196
+ to a Zarr array. This is meant to reduce the
197
+ scheduling burden for massive (terabyte-scale) arrays.
198
+
199
+ :param in_array: The input Dask array
200
+ :param block_shape: Tuple of (block_depth, block_height, block_width)
201
+ :param out_array: The output array
202
+ """
203
+ logger = logging.getLogger(__name__)
204
+
205
+ # Calculate total number of blocks for progress tracking
206
+ total_blocks = 1
207
+ for arr_dim, block_dim in zip(in_array.shape, block_shape):
208
+ total_blocks *= (arr_dim + block_dim - 1) // block_dim
209
+
210
+ logger.info(f" Writing {total_blocks} blocks (block shape: {block_shape})...")
211
+
212
+ # Iterate through the input array in
213
+ # steps equal to the block shape dimensions
214
+ block_idx = 0
215
+ log_interval = max(1, total_blocks // 10) # Log ~10 times total
216
+
217
+ for sl in BlockedArrayWriter.gen_slices(in_array.shape, block_shape):
218
+ block = in_array[sl]
219
+ da.store(
220
+ block,
221
+ out_array,
222
+ regions=sl,
223
+ lock=False,
224
+ compute=True,
225
+ return_stored=False,
226
+ )
227
+
228
+ block_idx += 1
229
+ if block_idx % log_interval == 0 or block_idx == total_blocks:
230
+ progress_pct = (block_idx / total_blocks) * 100
231
+ logger.info(f" Progress: {block_idx}/{total_blocks} blocks ({progress_pct:.1f}%)")
232
+
233
+ @staticmethod
234
+ def get_block_shape(arr, target_size_mb=409600, mode="cycle", chunks=None):
235
+ """
236
+ Given the shape and chunk size of a pre-chunked
237
+ array, determine the optimal block shape closest
238
+ to target_size. Expanded block dimensions are
239
+ an integer multiple of the chunk dimension
240
+ to ensure optimal access patterns.
241
+
242
+ Args:
243
+ arr: the input array
244
+ target_size_mb: target block size in megabytes,
245
+ default is 409600 mode: strategy.
246
+ Must be one of "cycle", or "iso"
247
+
248
+ Returns:
249
+ the block shape
250
+ """
251
+
252
+ if chunks is None:
253
+ if isinstance(arr, da.Array):
254
+ chunks = arr.chunksize
255
+ else:
256
+ chunks = arr.chunks
257
+
258
+ chunks = chunks[-3:]
259
+ return expand_chunks(
260
+ chunks,
261
+ arr.shape[-3:],
262
+ target_size_mb * 1024**2,
263
+ arr.itemsize,
264
+ mode,
265
+ )
@@ -0,0 +1,81 @@
1
+ """Helpful models used in the compression job"""
2
+
3
+ from enum import Enum
4
+ from pathlib import Path
5
+ from typing import List, Optional, Union
6
+
7
+ import numpy as np
8
+ from aind_data_transformation.core import BasicJobSettings
9
+ from dask import array as da
10
+ from numcodecs import Blosc
11
+ from pydantic import Field
12
+
13
+ ArrayLike = Union[da.Array, np.ndarray]
14
+ PathLike = Union[str, Path]
15
+
16
+
17
+ class CompressorName(str, Enum):
18
+ """Enum for compression algorithms a user can select"""
19
+
20
+ BLOSC = Blosc.codec_id
21
+
22
+
23
+ class ZeissJobSettings(BasicJobSettings):
24
+ """ZeissCompressionJob settings."""
25
+
26
+ input_source: PathLike = Field(
27
+ ...,
28
+ description=("Source of the Zeiss stack data."),
29
+ )
30
+ output_directory: PathLike = Field(
31
+ ...,
32
+ description=("Where to write the data to locally."),
33
+ )
34
+ s3_location: Optional[str] = None
35
+ num_of_partitions: int = Field(
36
+ ...,
37
+ description=(
38
+ "This script will generate a list of individual stacks, "
39
+ "and then partition the list into this number of partitions."
40
+ ),
41
+ )
42
+ partition_to_process: int = Field(
43
+ ...,
44
+ description=("Which partition of stacks to process. "),
45
+ )
46
+ compressor_name: CompressorName = Field(
47
+ default=CompressorName.BLOSC,
48
+ description="Type of compressor to use.",
49
+ title="Compressor Name.",
50
+ )
51
+ # It will be safer if these kwargs fields were objects with known schemas
52
+ compressor_kwargs: dict = Field(
53
+ default={"cname": "zstd", "clevel": 3, "shuffle": Blosc.SHUFFLE},
54
+ description="Arguments to be used for the compressor.",
55
+ title="Compressor Kwargs",
56
+ )
57
+ compress_job_save_kwargs: dict = Field(
58
+ default={"n_jobs": -1}, # -1 to use all available cpu cores.
59
+ description="Arguments for recording save method.",
60
+ title="Compress Job Save Kwargs",
61
+ )
62
+ chunk_size: List[int] = Field(
63
+ default=[128, 128, 128], # Default list with three integers
64
+ description="Chunk size in axis, a list of three integers",
65
+ title="Chunk Size",
66
+ )
67
+ scale_factor: List[int] = Field(
68
+ default=[2, 2, 2], # Default list with three integers
69
+ description="Scale factors in axis, a list of three integers",
70
+ title="Scale Factors",
71
+ )
72
+ downsample_levels: int = Field(
73
+ default=4,
74
+ description="The number of levels of the image pyramid",
75
+ title="Downsample Levels",
76
+ )
77
+ target_size_mb: int = Field(
78
+ default=19200,
79
+ description="Target size to pull from the CZI file to zarr",
80
+ title="Target Size",
81
+ )
@@ -0,0 +1,3 @@
1
+ """
2
+ Init functions
3
+ """