PyPI - Rhapso - Versions diffs - 0.1.92__py3-none-any.whl - Mend

Rhapso 0.1.92__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

Rhapso/__init__.py +1 -0
Rhapso/data_prep/__init__.py +2 -0
Rhapso/data_prep/n5_reader.py +188 -0
Rhapso/data_prep/s3_big_stitcher_reader.py +55 -0
Rhapso/data_prep/xml_to_dataframe.py +215 -0
Rhapso/detection/__init__.py +5 -0
Rhapso/detection/advanced_refinement.py +203 -0
Rhapso/detection/difference_of_gaussian.py +324 -0
Rhapso/detection/image_reader.py +117 -0
Rhapso/detection/metadata_builder.py +130 -0
Rhapso/detection/overlap_detection.py +327 -0
Rhapso/detection/points_validation.py +49 -0
Rhapso/detection/save_interest_points.py +265 -0
Rhapso/detection/view_transform_models.py +67 -0
Rhapso/fusion/__init__.py +0 -0
Rhapso/fusion/affine_fusion/__init__.py +2 -0
Rhapso/fusion/affine_fusion/blend.py +289 -0
Rhapso/fusion/affine_fusion/fusion.py +601 -0
Rhapso/fusion/affine_fusion/geometry.py +159 -0
Rhapso/fusion/affine_fusion/io.py +546 -0
Rhapso/fusion/affine_fusion/script_utils.py +111 -0
Rhapso/fusion/affine_fusion/setup.py +4 -0
Rhapso/fusion/affine_fusion_worker.py +234 -0
Rhapso/fusion/multiscale/__init__.py +0 -0
Rhapso/fusion/multiscale/aind_hcr_data_transformation/__init__.py +19 -0
Rhapso/fusion/multiscale/aind_hcr_data_transformation/compress/__init__.py +3 -0
Rhapso/fusion/multiscale/aind_hcr_data_transformation/compress/czi_to_zarr.py +698 -0
Rhapso/fusion/multiscale/aind_hcr_data_transformation/compress/zarr_writer.py +265 -0
Rhapso/fusion/multiscale/aind_hcr_data_transformation/models.py +81 -0
Rhapso/fusion/multiscale/aind_hcr_data_transformation/utils/__init__.py +3 -0
Rhapso/fusion/multiscale/aind_hcr_data_transformation/utils/utils.py +526 -0
Rhapso/fusion/multiscale/aind_hcr_data_transformation/zeiss_job.py +249 -0
Rhapso/fusion/multiscale/aind_z1_radial_correction/__init__.py +21 -0
Rhapso/fusion/multiscale/aind_z1_radial_correction/array_to_zarr.py +257 -0
Rhapso/fusion/multiscale/aind_z1_radial_correction/radial_correction.py +557 -0
Rhapso/fusion/multiscale/aind_z1_radial_correction/run_capsule.py +98 -0
Rhapso/fusion/multiscale/aind_z1_radial_correction/utils/__init__.py +3 -0
Rhapso/fusion/multiscale/aind_z1_radial_correction/utils/utils.py +266 -0
Rhapso/fusion/multiscale/aind_z1_radial_correction/worker.py +89 -0
Rhapso/fusion/multiscale_worker.py +113 -0
Rhapso/fusion/neuroglancer_link_gen/__init__.py +8 -0
Rhapso/fusion/neuroglancer_link_gen/dispim_link.py +235 -0
Rhapso/fusion/neuroglancer_link_gen/exaspim_link.py +127 -0
Rhapso/fusion/neuroglancer_link_gen/hcr_link.py +368 -0
Rhapso/fusion/neuroglancer_link_gen/iSPIM_top.py +47 -0
Rhapso/fusion/neuroglancer_link_gen/link_utils.py +239 -0
Rhapso/fusion/neuroglancer_link_gen/main.py +299 -0
Rhapso/fusion/neuroglancer_link_gen/ng_layer.py +1434 -0
Rhapso/fusion/neuroglancer_link_gen/ng_state.py +1123 -0
Rhapso/fusion/neuroglancer_link_gen/parsers.py +336 -0
Rhapso/fusion/neuroglancer_link_gen/raw_link.py +116 -0
Rhapso/fusion/neuroglancer_link_gen/utils/__init__.py +4 -0
Rhapso/fusion/neuroglancer_link_gen/utils/shader_utils.py +85 -0
Rhapso/fusion/neuroglancer_link_gen/utils/transfer.py +43 -0
Rhapso/fusion/neuroglancer_link_gen/utils/utils.py +303 -0
Rhapso/fusion/neuroglancer_link_gen_worker.py +30 -0
Rhapso/matching/__init__.py +0 -0
Rhapso/matching/load_and_transform_points.py +458 -0
Rhapso/matching/ransac_matching.py +544 -0
Rhapso/matching/save_matches.py +120 -0
Rhapso/matching/xml_parser.py +302 -0
Rhapso/pipelines/__init__.py +0 -0
Rhapso/pipelines/ray/__init__.py +0 -0
Rhapso/pipelines/ray/aws/__init__.py +0 -0
Rhapso/pipelines/ray/aws/alignment_pipeline.py +227 -0
Rhapso/pipelines/ray/aws/config/__init__.py +0 -0
Rhapso/pipelines/ray/evaluation.py +71 -0
Rhapso/pipelines/ray/interest_point_detection.py +137 -0
Rhapso/pipelines/ray/interest_point_matching.py +110 -0
Rhapso/pipelines/ray/local/__init__.py +0 -0
Rhapso/pipelines/ray/local/alignment_pipeline.py +167 -0
Rhapso/pipelines/ray/matching_stats.py +104 -0
Rhapso/pipelines/ray/param/__init__.py +0 -0
Rhapso/pipelines/ray/solver.py +120 -0
Rhapso/pipelines/ray/split_dataset.py +78 -0
Rhapso/solver/__init__.py +0 -0
Rhapso/solver/compute_tiles.py +562 -0
Rhapso/solver/concatenate_models.py +116 -0
Rhapso/solver/connected_graphs.py +111 -0
Rhapso/solver/data_prep.py +181 -0
Rhapso/solver/global_optimization.py +410 -0
Rhapso/solver/model_and_tile_setup.py +109 -0
Rhapso/solver/pre_align_tiles.py +323 -0
Rhapso/solver/save_results.py +97 -0
Rhapso/solver/view_transforms.py +75 -0
Rhapso/solver/xml_to_dataframe_solver.py +213 -0
Rhapso/split_dataset/__init__.py +0 -0
Rhapso/split_dataset/compute_grid_rules.py +78 -0
Rhapso/split_dataset/save_points.py +101 -0
Rhapso/split_dataset/save_xml.py +377 -0
Rhapso/split_dataset/split_images.py +537 -0
Rhapso/split_dataset/xml_to_dataframe_split.py +219 -0
rhapso-0.1.92.dist-info/METADATA +39 -0
rhapso-0.1.92.dist-info/RECORD +101 -0
rhapso-0.1.92.dist-info/WHEEL +5 -0
rhapso-0.1.92.dist-info/licenses/LICENSE +21 -0
rhapso-0.1.92.dist-info/top_level.txt +2 -0
tests/__init__.py +1 -0
tests/test_detection.py +17 -0
tests/test_matching.py +21 -0
tests/test_solving.py +21 -0

Rhapso/fusion/multiscale/aind_hcr_data_transformation/compress/zarr_writer.py ADDED Viewed

@@ -0,0 +1,265 @@
+"""
+This module defines a class that takes
+big chunks (compilation of chunks) from
+a dask array and writes it on disk in
+zarr format
+"""
+import logging
+from typing import Generator, Tuple
+import dask.array as da
+import numpy as np
+from numpy.typing import ArrayLike
+def _get_size(shape: Tuple[int, ...], itemsize: int) -> int:
+    """
+    Return the size of an array with the given shape, in bytes
+    Args:
+        shape: the shape of the array
+        itemsize: number of bytes per array element
+    Returns:
+        the size of the array, in bytes
+    """
+    if any(s <= 0 for s in shape):
+        raise ValueError("shape must be > 0 in all dimensions")
+    return np.prod(shape) * itemsize
+def _closer_to_target(
+    shape1: Tuple[int, ...],
+    shape2: Tuple[int, ...],
+    target_bytes: int,
+    itemsize: int,
+) -> Tuple[int, ...]:
+    """
+    Given two shapes with the same number of dimensions,
+    find which one is closer to target_bytes.
+    Args:
+        shape1: the first shape
+        shape2: the second shape
+        target_bytes: the target size for the returned shape
+        itemsize: number of bytes per array element
+    """
+    size1 = float(_get_size(shape1, itemsize))
+    size2 = float(_get_size(shape2, itemsize))
+    if abs(size1 - target_bytes) < abs(size2 - target_bytes):
+        return shape1
+    return shape2
+def expand_chunks(  # noqa: C901
+    chunks: Tuple[int, int, int],
+    data_shape: Tuple[int, int, int],
+    target_size: int,
+    itemsize: int,
+    mode: str = "iso",
+) -> Tuple[int, int, int]:
+    """
+    Given the shape and chunk size of a pre-chunked 3D array,
+    determine the optimal chunk shape closest to target_size.
+    Expanded chunk dimensions are an integer multiple of
+    the base chunk dimension, to ensure optimal access patterns.
+    Args:
+        chunks: the shape of the input array chunks
+        data_shape: the shape of the input array
+        target_size: target chunk size in bytes
+        itemsize: the number of bytes per array element
+        mode: chunking strategy. Must be one of "cycle", or "iso"
+    Returns:
+        the optimal chunk shape
+    """
+    if any(c < 1 for c in chunks):
+        raise ValueError("chunks must be >= 1 for all dimensions")
+    if any(s < 1 for s in data_shape):
+        raise ValueError("data_shape must be >= 1 for all dimensions")
+    if any(c > s for c, s in zip(chunks, data_shape)):
+        raise ValueError(
+            "chunks cannot be larger than data_shape in any dimension"
+        )
+    if target_size <= 0:
+        raise ValueError("target_size must be > 0")
+    if itemsize <= 0:
+        raise ValueError("itemsize must be > 0")
+    if mode == "cycle":
+        # get the spatial dimensions only
+        current = np.array(chunks, dtype=np.uint64)
+        prev = current.copy()
+        idx = 0
+        ndims = len(current)
+        while _get_size(current, itemsize) < target_size:
+            prev = current.copy()
+            current[idx % ndims] = min(
+                data_shape[idx % ndims], current[idx % ndims] * 2
+            )
+            idx += 1
+            if all(c >= s for c, s in zip(current, data_shape)):
+                break
+        expanded = _closer_to_target(current, prev, target_size, itemsize)
+    elif mode == "iso":
+        initial = np.array(chunks, dtype=np.uint64)
+        current = initial
+        prev = current
+        i = 2
+        while _get_size(current, itemsize) < target_size:
+            prev = current
+            current = initial * i
+            current = (
+                min(data_shape[0], current[0]),
+                min(data_shape[1], current[1]),
+                min(data_shape[2], current[2]),
+            )
+            i += 1
+            if all(c >= s for c, s in zip(current, data_shape)):
+                break
+        expanded = _closer_to_target(current, prev, target_size, itemsize)
+    else:
+        raise ValueError(f"Invalid mode {mode}")
+    return tuple(int(d) for d in expanded)
+class BlockedArrayWriter:
+    """
+    Static class to write a lazy array
+    in big chunks to OMEZarr
+    """
+    @staticmethod
+    def gen_slices(
+        arr_shape: Tuple[int, ...], block_shape: Tuple[int, ...]
+    ) -> Generator:
+        """
+        Generate a series of slices that can be
+        used to traverse an array in blocks of a given shape.
+        The method generates tuples of slices, each representing
+        a block of the array. The blocks are generated by
+        iterating over the array in steps of the block
+        shape along each dimension.
+        Parameters
+        ----------
+        arr_shape : tuple of int
+            The shape of the array to be sliced.
+        block_shape : tuple of int
+            The desired shape of the blocks. This should be a
+            tuple of integers representing the size of each
+            dimension of the block. The length of `block_shape`
+            should be equal to the length of `arr_shape`.
+            If the array shape is not divisible by the block
+            shape along a dimension, the last slice
+            along that dimension is truncated.
+        Returns
+        -------
+        generator of tuple of slice
+            A generator yielding tuples of slices.
+            Each tuple can be used to index the input array.
+        """
+        if len(arr_shape) != len(block_shape):
+            raise Exception(
+                "array shape and block shape have different lengths"
+            )
+        def _slice_along_dim(dim: int) -> Generator:
+            """
+            A helper generator function that
+            slices along one dimension.
+            """
+            # Base case: if the dimension is beyond
+            # the last one, return an empty tuple
+            if dim >= len(arr_shape):
+                yield ()
+            else:
+                # Iterate over the current dimension in steps of the block size
+                for i in range(0, arr_shape[dim], block_shape[dim]):
+                    # Calculate the end index for this block
+                    end_i = min(i + block_shape[dim], arr_shape[dim])
+                    # Generate slices for the remaining dimensions
+                    for rest in _slice_along_dim(dim + 1):
+                        yield (slice(i, end_i),) + rest
+        # Start slicing along the first dimension
+        return _slice_along_dim(0)
+    @staticmethod
+    def store(
+        in_array: da.Array, out_array: ArrayLike, block_shape: tuple
+    ) -> None:
+        """
+        Partitions the last 3 dimensions of a Dask array
+        into non-overlapping blocks and writes them sequentially
+        to a Zarr array. This is meant to reduce the
+        scheduling burden for massive (terabyte-scale) arrays.
+        :param in_array: The input Dask array
+        :param block_shape: Tuple of (block_depth, block_height, block_width)
+        :param out_array: The output array
+        """
+        logger = logging.getLogger(__name__)
+        # Calculate total number of blocks for progress tracking
+        total_blocks = 1
+        for arr_dim, block_dim in zip(in_array.shape, block_shape):
+            total_blocks *= (arr_dim + block_dim - 1) // block_dim
+        logger.info(f"   Writing {total_blocks} blocks (block shape: {block_shape})...")
+        # Iterate through the input array in
+        # steps equal to the block shape dimensions
+        block_idx = 0
+        log_interval = max(1, total_blocks // 10)  # Log ~10 times total
+        for sl in BlockedArrayWriter.gen_slices(in_array.shape, block_shape):
+            block = in_array[sl]
+            da.store(
+                block,
+                out_array,
+                regions=sl,
+                lock=False,
+                compute=True,
+                return_stored=False,
+            )
+            block_idx += 1
+            if block_idx % log_interval == 0 or block_idx == total_blocks:
+                progress_pct = (block_idx / total_blocks) * 100
+                logger.info(f"   Progress: {block_idx}/{total_blocks} blocks ({progress_pct:.1f}%)")
+    @staticmethod
+    def get_block_shape(arr, target_size_mb=409600, mode="cycle", chunks=None):
+        """
+        Given the shape and chunk size of a pre-chunked
+        array, determine the optimal block shape closest
+        to target_size. Expanded block dimensions are
+        an integer multiple of the chunk dimension
+        to ensure optimal access patterns.
+        Args:
+            arr: the input array
+            target_size_mb: target block size in megabytes,
+            default is 409600 mode: strategy.
+            Must be one of "cycle", or "iso"
+        Returns:
+            the block shape
+        """
+        if chunks is None:
+            if isinstance(arr, da.Array):
+                chunks = arr.chunksize
+            else:
+                chunks = arr.chunks
+        chunks = chunks[-3:]
+        return expand_chunks(
+            chunks,
+            arr.shape[-3:],
+            target_size_mb * 1024**2,
+            arr.itemsize,
+            mode,
+        )

Rhapso/fusion/multiscale/aind_hcr_data_transformation/models.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""Helpful models used in the compression job"""
+from enum import Enum
+from pathlib import Path
+from typing import List, Optional, Union
+import numpy as np
+from aind_data_transformation.core import BasicJobSettings
+from dask import array as da
+from numcodecs import Blosc
+from pydantic import Field
+ArrayLike = Union[da.Array, np.ndarray]
+PathLike = Union[str, Path]
+class CompressorName(str, Enum):
+    """Enum for compression algorithms a user can select"""
+    BLOSC = Blosc.codec_id
+class ZeissJobSettings(BasicJobSettings):
+    """ZeissCompressionJob settings."""
+    input_source: PathLike = Field(
+        ...,
+        description=("Source of the Zeiss stack data."),
+    )
+    output_directory: PathLike = Field(
+        ...,
+        description=("Where to write the data to locally."),
+    )
+    s3_location: Optional[str] = None
+    num_of_partitions: int = Field(
+        ...,
+        description=(
+            "This script will generate a list of individual stacks, "
+            "and then partition the list into this number of partitions."
+        ),
+    )
+    partition_to_process: int = Field(
+        ...,
+        description=("Which partition of stacks to process. "),
+    )
+    compressor_name: CompressorName = Field(
+        default=CompressorName.BLOSC,
+        description="Type of compressor to use.",
+        title="Compressor Name.",
+    )
+    # It will be safer if these kwargs fields were objects with known schemas
+    compressor_kwargs: dict = Field(
+        default={"cname": "zstd", "clevel": 3, "shuffle": Blosc.SHUFFLE},
+        description="Arguments to be used for the compressor.",
+        title="Compressor Kwargs",
+    )
+    compress_job_save_kwargs: dict = Field(
+        default={"n_jobs": -1},  # -1 to use all available cpu cores.
+        description="Arguments for recording save method.",
+        title="Compress Job Save Kwargs",
+    )
+    chunk_size: List[int] = Field(
+        default=[128, 128, 128],  # Default list with three integers
+        description="Chunk size in axis, a list of three integers",
+        title="Chunk Size",
+    )
+    scale_factor: List[int] = Field(
+        default=[2, 2, 2],  # Default list with three integers
+        description="Scale factors in axis, a list of three integers",
+        title="Scale Factors",
+    )
+    downsample_levels: int = Field(
+        default=4,
+        description="The number of levels of the image pyramid",
+        title="Downsample Levels",
+    )
+    target_size_mb: int = Field(
+        default=19200,
+        description="Target size to pull from the CZI file to zarr",
+        title="Target Size",
+    )

Rhapso/fusion/multiscale/aind_hcr_data_transformation/utils/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""
+Init functions
+"""