PyPI - careamics - Versions diffs - 0.0.4.2__py3-none-any.whl → 0.0.5__py3-none-any.whl - Mend

careamics 0.0.4.2py3-none-any.whl → 0.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of careamics might be problematic. Click here for more details.

Files changed (43) hide show

careamics/careamist.py +235 -25
careamics/cli/conf.py +19 -30
careamics/cli/main.py +111 -10
careamics/cli/utils.py +29 -0
careamics/config/__init__.py +2 -0
careamics/config/architectures/lvae_model.py +104 -21
careamics/config/configuration_factory.py +49 -45
careamics/config/configuration_model.py +2 -2
careamics/config/likelihood_model.py +7 -6
careamics/config/loss_model.py +56 -0
careamics/config/nm_model.py +24 -24
careamics/config/vae_algorithm_model.py +14 -13
careamics/dataset/dataset_utils/running_stats.py +22 -23
careamics/lightning/lightning_module.py +58 -27
careamics/lightning/train_data_module.py +15 -1
careamics/losses/loss_factory.py +1 -85
careamics/losses/lvae/losses.py +223 -164
careamics/lvae_training/calibration.py +184 -0
careamics/lvae_training/dataset/config.py +2 -2
careamics/lvae_training/dataset/multich_dataset.py +11 -19
careamics/lvae_training/dataset/multifile_dataset.py +3 -2
careamics/lvae_training/dataset/types.py +15 -26
careamics/lvae_training/dataset/utils/index_manager.py +4 -4
careamics/lvae_training/eval_utils.py +125 -213
careamics/model_io/bioimage/_readme_factory.py +25 -33
careamics/model_io/bioimage/cover_factory.py +171 -0
careamics/model_io/bioimage/model_description.py +39 -17
careamics/model_io/bmz_io.py +36 -25
careamics/models/layers.py +6 -4
careamics/models/lvae/layers.py +348 -975
careamics/models/lvae/likelihoods.py +10 -8
careamics/models/lvae/lvae.py +214 -272
careamics/models/lvae/noise_models.py +179 -112
careamics/models/lvae/stochastic.py +393 -0
careamics/models/lvae/utils.py +82 -73
careamics/utils/lightning_utils.py +57 -0
careamics/utils/serializers.py +2 -0
careamics/utils/torch_utils.py +1 -1
{careamics-0.0.4.2.dist-info → careamics-0.0.5.dist-info}/METADATA +12 -9
{careamics-0.0.4.2.dist-info → careamics-0.0.5.dist-info}/RECORD +43 -37
{careamics-0.0.4.2.dist-info → careamics-0.0.5.dist-info}/WHEEL +1 -1
{careamics-0.0.4.2.dist-info → careamics-0.0.5.dist-info}/entry_points.txt +0 -0
{careamics-0.0.4.2.dist-info → careamics-0.0.5.dist-info}/licenses/LICENSE +0 -0

careamics/models/lvae/lvae.py CHANGED Viewed

@@ -1,11 +1,12 @@
 """
-Ladder VAE (LVAE) Model
+Ladder VAE (LVAE) Model.
-The current implementation is based on "Interpretable Unsupervised Diversity Denoising and Artefact Removal, Prakash et al."
+The current implementation is based on "Interpretable Unsupervised Diversity Denoising
+and Artefact Removal, Prakash et al."
 """
 from collections.abc import Iterable
-from typing import Dict, List, Tuple
+from typing import Union
 import numpy as np
 import torch
@@ -17,42 +18,80 @@ from ..activation import get_activation
 from .layers import (
     BottomUpDeterministicResBlock,
     BottomUpLayer,
+    GateLayer,
     TopDownDeterministicResBlock,
     TopDownLayer,
 )
-from .utils import Interpolate, ModelType, crop_img_tensor, pad_img_tensor
+from .utils import Interpolate, ModelType, crop_img_tensor
 @register_model("LVAE")
 class LadderVAE(nn.Module):
+    """
+    Constructor.
+    Parameters
+    ----------
+    input_shape : int
+        The size of the input image.
+    output_channels : int
+        The number of output channels.
+    multiscale_count : int
+        The number of scales for multiscale processing.
+    z_dims : list[int]
+        The dimensions of the latent space for each layer.
+    encoder_n_filters : int
+        The number of filters in the encoder.
+    decoder_n_filters : int
+        The number of filters in the decoder.
+    encoder_conv_strides : list[int]
+        The strides for the conv layers encoder.
+    decoder_conv_strides : list[int]
+        The strides for the conv layers decoder.
+    encoder_dropout : float
+        The dropout rate for the encoder.
+    decoder_dropout : float
+        The dropout rate for the decoder.
+    nonlinearity : str
+        The nonlinearity function to use.
+    predict_logvar : bool
+        Whether to predict the log variance.
+    analytical_kl : bool
+        Whether to use analytical KL divergence.
+    Raises
+    ------
+    NotImplementedError
+        If only 2D convolutions are supported.
+    """
     def __init__(
         self,
         input_shape: int,
         output_channels: int,
         multiscale_count: int,
-        z_dims: List[int],
+        z_dims: list[int],
         encoder_n_filters: int,
         decoder_n_filters: int,
+        encoder_conv_strides: list[int],
+        decoder_conv_strides: list[int],
         encoder_dropout: float,
         decoder_dropout: float,
         nonlinearity: str,
         predict_logvar: bool,
         analytical_kl: bool,
     ):
-        """
-        Constructor.
-        Parameters
-        ----------
-        """
         super().__init__()
         # -------------------------------------------------------
         # Customizable attributes
         self.image_size = input_shape
+        """Input image size. (Z, Y, X) or (Y, X) if the data is 2D."""
+        # TODO: we need to be careful with this since used to be an int.
+        # the tuple of shapes used to be `self.input_shape`.
         self.target_ch = output_channels
+        self.encoder_conv_strides = encoder_conv_strides
+        self.decoder_conv_strides = decoder_conv_strides
         self._multiscale_count = multiscale_count
         self.z_dims = z_dims
         self.encoder_n_filters = encoder_n_filters
@@ -80,7 +119,6 @@ class LadderVAE(nn.Module):
         self.merge_type = "residual"
         self.no_initial_downscaling = True
         self.skip_bottomk_buvalues = 0
-        self.non_stochastic_version = False
         self.stochastic_skip = True
         self.learn_top_prior = True
         self.res_block_type = "bacdbacd"  # TODO remove !
@@ -91,9 +129,7 @@ class LadderVAE(nn.Module):
         self._enable_topdown_normalize_factor = True
         # Attributes that handle LC -> Hardcoded
-        self.enable_multiscale = (
-            self._multiscale_count is not None and self._multiscale_count > 1
-        )
+        self.enable_multiscale = self._multiscale_count > 1
         self.multiscale_retain_spatial_dims = True
         self.multiscale_lowres_separate_branch = False
         self.multiscale_decoder_retain_spatial_dims = (
@@ -102,14 +138,6 @@ class LadderVAE(nn.Module):
         # Derived attributes
         self.n_layers = len(self.z_dims)
-        self.encoder_no_padding_mode = (
-            self.encoder_res_block_skip_padding is True
-            and self.encoder_res_block_kernel > 1
-        )
-        self.decoder_no_padding_mode = (
-            self.decoder_res_block_skip_padding is True
-            and self.decoder_res_block_kernel > 1
-        )
         # Others...
         self._tethered_to_input = False
@@ -127,19 +155,41 @@ class LadderVAE(nn.Module):
         # -------------------------------------------------------
         # Data attributes
-        self.color_ch = 1
-        self.img_shape = (self.image_size, self.image_size)
+        self.color_ch = 1  # TODO for now we only support 1 channel
         self.normalized_input = True
         # -------------------------------------------------------
         # -------------------------------------------------------
         # Loss attributes
-        self._restricted_kl = False  # HC
         # enabling reconstruction loss on mixed input
         self.mixed_rec_w = 0
         self.nbr_consistency_w = 0
         # -------------------------------------------------------
+        # 3D related stuff
+        self._mode_3D = len(self.image_size) == 3  # TODO refac
+        self._model_3D_depth = self.image_size[0] if self._mode_3D else 1
+        self._decoder_mode_3D = len(self.decoder_conv_strides) == 3
+        if self._mode_3D and not self._decoder_mode_3D:
+            assert self._model_3D_depth % 2 == 1, "3D model depth should be odd"
+        assert (
+            self._mode_3D is True or self._decoder_mode_3D is False
+        ), "Decoder cannot be 3D when encoder is 2D"
+        self._squish3d = self._mode_3D and not self._decoder_mode_3D
+        self._3D_squisher = (
+            None
+            if not self._squish3d
+            else nn.ModuleList(
+                [
+                    GateLayer(
+                        channels=self.encoder_n_filters,
+                        conv_strides=self.encoder_conv_strides,
+                    )
+                    for k in range(len(self.z_dims))
+                ]
+            )
+        )
+        # TODO: this bit is in the Ashesh's confusing-hacky style... Can we do better?
         # -------------------------------------------------------
         # # Training attributes
@@ -168,6 +218,11 @@ class LadderVAE(nn.Module):
         ### CREATE MODEL BLOCKS
         # First bottom-up layer: change num channels + downsample by factor 2
         # unless we want to prevent this
+        self.encoder_conv_op = getattr(nn, f"Conv{len(self.encoder_conv_strides)}d")
+        # TODO these should be defined for all layers here ?
+        self.decoder_conv_op = getattr(nn, f"Conv{len(self.decoder_conv_strides)}d")
+        # TODO: would be more readable to have a derived parameters to use like
+        # `conv_dims = len(self.encoder_conv_strides)` and then use `Conv{conv_dims}d`
         stride = 1 if self.no_initial_downscaling else 2
         self.first_bottom_up = self.create_first_bottom_up(stride)
@@ -191,7 +246,7 @@ class LadderVAE(nn.Module):
         # Output layer --> Project to target_ch many channels
         logvar_ch_needed = self.predict_logvar is not None
-        self.output_layer = self.parameter_net = nn.Conv2d(
+        self.output_layer = self.parameter_net = self.decoder_conv_op(
             self.decoder_n_filters,
             self.target_ch * (1 + logvar_ch_needed),
             kernel_size=3,
@@ -205,6 +260,7 @@ class LadderVAE(nn.Module):
         # PSNR computation on validation.
         # self.label1_psnr = RunningPSNR()
         # self.label2_psnr = RunningPSNR()
+        # TODO: did you add this?
         # msg =f'[{self.__class__.__name__}] Stoc:{not self.non_stochastic_version} RecMode:{self.reconstruction_mode} TethInput:{self._tethered_to_input}'
         # msg += f' TargetCh: {self.target_ch}'
@@ -217,7 +273,8 @@ class LadderVAE(nn.Module):
         num_res_blocks: int = 1,
     ) -> nn.Sequential:
         """
-        This method creates the first bottom-up block of the Encoder.
+        Method creates the first bottom-up block of the Encoder.
         Its role is to perform a first image compression step.
         It is composed by a sequence of nn.Conv2d + non-linearity +
         BottomUpDeterministicResBlock (1 or more, default is 1).
@@ -225,29 +282,30 @@ class LadderVAE(nn.Module):
         Parameters
         ----------
         init_stride: int
-            The stride used by the initial Conv2d block.
+            The stride used by the intial Conv2d block.
         num_res_blocks: int, optional
-            The number of BottomUpDeterministicResBlocks to include in the layer, default is 1.
+            The number of BottomUpDeterministicResBlocks, default is 1.
         """
+        # From what I got from Ashesh, Z should not be touched in any case.
         nonlin = get_activation(self.nonlin)
-        modules = [
-            nn.Conv2d(
-                in_channels=self.color_ch,
-                out_channels=self.encoder_n_filters,
-                kernel_size=self.encoder_res_block_kernel,
-                padding=(
-                    0
-                    if self.encoder_res_block_skip_padding
-                    else self.encoder_res_block_kernel // 2
-                ),
-                stride=init_stride,
+        conv_block = self.encoder_conv_op(
+            in_channels=self.color_ch,
+            out_channels=self.encoder_n_filters,
+            kernel_size=self.encoder_res_block_kernel,
+            padding=(
+                0
+                if self.encoder_res_block_skip_padding
+                else self.encoder_res_block_kernel // 2
             ),
-            nonlin,
-        ]
+            stride=init_stride,
+        )
+        modules = [conv_block, nonlin]
         for _ in range(num_res_blocks):
             modules.append(
                 BottomUpDeterministicResBlock(
+                    conv_strides=self.encoder_conv_strides,
                     c_in=self.encoder_n_filters,
                     c_out=self.encoder_n_filters,
                     nonlin=nonlin,
@@ -255,7 +313,6 @@ class LadderVAE(nn.Module):
                     batchnorm=self.bottomup_batchnorm,
                     dropout=self.encoder_dropout,
                     res_block_type=self.res_block_type,
-                    skip_padding=self.encoder_res_block_skip_padding,
                     res_block_kernel=self.encoder_res_block_kernel,
                 )
             )
@@ -264,7 +321,8 @@ class LadderVAE(nn.Module):
     def create_bottom_up_layers(self, lowres_separate_branch: bool) -> nn.ModuleList:
         """
-        This method creates the stack of bottom-up layers of the Encoder
+        Method creates the stack of bottom-up layers of the Encoder.
         that are used to generate the so-called `bu_values`.
         NOTE:
@@ -274,8 +332,9 @@ class LadderVAE(nn.Module):
         Parameters
         ----------
         lowres_separate_branch: bool
-            Whether the residual block(s) used for encoding the low-res input are shared (`False`) or
-            not (`True`) with the "same-size" residual block(s) in the `BottomUpLayer`'s primary flow.
+            Whether the residual block(s) used for encoding the low-res input are shared
+            (`False`) or not (`True`) with the "same-size" residual block(s) in the
+            `BottomUpLayer`'s primary flow.
         """
         multiscale_lowres_size_factor = 1
         nonlin = get_activation(self.nonlin)
@@ -294,11 +353,11 @@ class LadderVAE(nn.Module):
             # N.B. Only used if layer_enable_multiscale == True, so we updated it only in that case
             multiscale_lowres_size_factor *= 1 + int(layer_enable_multiscale)
-            output_expected_shape = (
-                (self.img_shape[0] // 2 ** (i + 1), self.img_shape[1] // 2 ** (i + 1))
-                if self._multiscale_count > 1
-                else None
-            )
+            # TODO: check correctness of this
+            if self._multiscale_count > 1:
+                output_expected_shape = (dim // 2 ** (i + 1) for dim in self.image_size)
+            else:
+                output_expected_shape = None
             # Add bottom-up deterministic layer at level i.
             # It's a sequence of residual blocks (BottomUpDeterministicResBlock), possibly with downsampling between them.
@@ -308,14 +367,14 @@ class LadderVAE(nn.Module):
                     n_filters=self.encoder_n_filters,
                     downsampling_steps=self.downsample[i],
                     nonlin=nonlin,
+                    conv_strides=self.encoder_conv_strides,
                     batchnorm=self.bottomup_batchnorm,
                     dropout=self.encoder_dropout,
                     res_block_type=self.res_block_type,
                     res_block_kernel=self.encoder_res_block_kernel,
-                    res_block_skip_padding=self.encoder_res_block_skip_padding,
                     gated=self.gated,
                     lowres_separate_branch=lowres_separate_branch,
-                    enable_multiscale=self.enable_multiscale,  # shouldn't the arg be `layer_enable_multiscale` here?
+                    enable_multiscale=self.enable_multiscale,  # TODO: shouldn't the arg be `layer_enable_multiscale` here?
                     multiscale_retain_spatial_dims=self.multiscale_retain_spatial_dims,
                     multiscale_lowres_size_factor=multiscale_lowres_size_factor,
                     decoder_retain_spatial_dims=self.multiscale_decoder_retain_spatial_dims,
@@ -327,7 +386,8 @@ class LadderVAE(nn.Module):
     def create_top_down_layers(self) -> nn.ModuleList:
         """
-        This method creates the stack of top-down layers of the Decoder.
+        Method creates the stack of top-down layers of the Decoder.
         In these layer the `bu`_values` from the Encoder are merged with the `p_params` from the previous layer
         of the Decoder to get `q_params`. Then, a stochastic layer generates a sample from the latent distribution
         with parameters `q_params`. Finally, this sample is fed through a TopDownDeterministicResBlock to
@@ -346,8 +406,6 @@ class LadderVAE(nn.Module):
             When doing unconditional generation, bu_value is not available. Hence the
             merge layer is not used, and z is sampled directly from p_params.
-        Parameters
-        ----------
         """
         top_down_layers = nn.ModuleList([])
         nonlin = get_activation(self.nonlin)
@@ -356,7 +414,7 @@ class LadderVAE(nn.Module):
             # Check if this is the top layer
             is_top = i == self.n_layers - 1
-            if self._enable_topdown_normalize_factor:
+            if self._enable_topdown_normalize_factor:  # TODO: What is this?
                 normalize_latent_factor = (
                     1 / np.sqrt(2 * (1 + i)) if len(self.z_dims) > 4 else 1.0
                 )
@@ -369,7 +427,8 @@ class LadderVAE(nn.Module):
                     n_res_blocks=self.decoder_blocks_per_layer,
                     n_filters=self.decoder_n_filters,
                     is_top_layer=is_top,
-                    downsampling_steps=self.downsample[i],
+                    conv_strides=self.decoder_conv_strides,
+                    upsampling_steps=self.downsample[i],
                     nonlin=nonlin,
                     merge_type=self.merge_type,
                     batchnorm=self.topdown_batchnorm,
@@ -379,17 +438,11 @@ class LadderVAE(nn.Module):
                     top_prior_param_shape=self.get_top_prior_param_shape(),
                     res_block_type=self.res_block_type,
                     res_block_kernel=self.decoder_res_block_kernel,
-                    res_block_skip_padding=self.decoder_res_block_skip_padding,
                     gated=self.gated,
                     analytical_kl=self.analytical_kl,
-                    restricted_kl=self._restricted_kl,
                     vanilla_latent_hw=self.get_latent_spatial_size(i),
-                    # in no_padding_mode, what gets passed from the encoder are not multiples of 2 and so merging operation does not work natively.
-                    bottomup_no_padding_mode=self.encoder_no_padding_mode,
-                    topdown_no_padding_mode=self.decoder_no_padding_mode,
                     retain_spatial_dims=self.multiscale_decoder_retain_spatial_dims,
-                    non_stochastic_version=self.non_stochastic_version,
-                    input_image_shape=self.img_shape,
+                    input_image_shape=self.image_size,
                     normalize_latent_factor=normalize_latent_factor,
                     conv2d_bias=self.topdown_conv2d_bias,
                     stochastic_use_naive_exponential=self._stochastic_use_naive_exponential,
@@ -398,8 +451,10 @@ class LadderVAE(nn.Module):
         return top_down_layers
     def create_final_topdown_layer(self, upsample: bool) -> nn.Sequential:
-        """
-        This method creates the final top-down layer of the Decoder.
+        """Create the final top-down layer of the Decoder.
+        NOTE: In this layer, (optional) upsampling is performed by bilinear interpolation
+        instead of transposed convolution (like in other TD layers).
         Parameters
         ----------
@@ -419,69 +474,76 @@ class LadderVAE(nn.Module):
                     c_in=self.decoder_n_filters,
                     c_out=self.decoder_n_filters,
                     nonlin=get_activation(self.nonlin),
+                    conv_strides=self.decoder_conv_strides,
                     batchnorm=self.topdown_batchnorm,
                     dropout=self.decoder_dropout,
                     res_block_type=self.res_block_type,
                     res_block_kernel=self.decoder_res_block_kernel,
-                    skip_padding=self.decoder_res_block_skip_padding,
                     gated=self.gated,
                     conv2d_bias=self.topdown_conv2d_bias,
                 )
             )
         return nn.Sequential(*modules)
-    def _init_multires(
-        self, config=None
-    ) -> nn.ModuleList:  # TODO config: ml_collections.ConfigDict refactor
+    def _init_multires(self, config=None) -> nn.ModuleList:
         """
-        This method defines the input block/branch to encode/compress low-res lateral inputs at different hierarchical levels
-        in the multiresolution approach (LC). The role of the input branches is similar to the one of the first bottom-up layer
-        in the primary flow of the Encoder, namely to compress the lateral input image to a degree that is compatible with the
-        one of the primary flow.
-        NOTE 1: Each input branch consists of a sequence of Conv2d + non-linearity + BottomUpDeterministicResBlock.
-        It is meaningful to observe that the `BottomUpDeterministicResBlock` shares the same model attributes with the blocks
-        in the primary flow of the Encoder (e.g., c_in, c_out, dropout, etc. etc.). Moreover, it does not perform downsampling.
-        NOTE 2: `_multiscale_count` attribute defines the total number of inputs to the bottom-up pass.
-        In other terms if we have the input patch and n_LC additional lateral inputs, we will have a total of (n_LC + 1) inputs.
+        Method defines the input block/branch to encode/compress low-res lateral inputs.
+        at different hierarchical levels
+        in the multiresolution approach (LC). The role of the input branches is similar
+        to the one of the first bottom-up layer in the primary flow of the Encoder,
+        namely to compress the lateral input image to a degree that is compatible with
+        the one of the primary flow.
+        NOTE 1: Each input branch consists of a sequence of Conv2d + non-linearity
+        + BottomUpDeterministicResBlock. It is meaningful to observe that the
+        `BottomUpDeterministicResBlock` shares the same model attributes with the blocks
+        in the primary flow of the Encoder (e.g., c_in, c_out, dropout, etc. etc.).
+        Moreover, it does not perform downsampling.
+        NOTE 2: `_multiscale_count` attribute defines the total number of inputs to the
+        bottom-up pass. In other terms if we have the input patch and n_LC additional
+        lateral inputs, we will have a total of (n_LC + 1) inputs.
         """
         stride = 1 if self.no_initial_downscaling else 2
         nonlin = get_activation(self.nonlin)
         if self._multiscale_count is None:
             self._multiscale_count = 1
-        msg = "Multiscale count({}) should not exceed the number of bottom up layers ({}) by more than 1"
-        msg = msg.format(self._multiscale_count, self.n_layers)
+        msg = (
+            f"Multiscale count ({self._multiscale_count}) should not exceed the number"
+            f"of bottom up layers ({self.n_layers}) by more than 1.\n"
+        )
         assert (
             self._multiscale_count <= 1 or self._multiscale_count <= 1 + self.n_layers
-        ), msg
+        ), msg  # TODO how ?
         msg = (
-            "if multiscale is enabled, then we are just working with monocrome images."
+            "Multiscale approach only supports monocrome images. "
+            f"Found instead color_ch={self.color_ch}."
         )
-        assert self._multiscale_count == 1 or self.color_ch == 1, msg
+        # assert self._multiscale_count == 1 or self.color_ch == 1, msg
         lowres_first_bottom_ups = []
         for _ in range(1, self._multiscale_count):
             first_bottom_up = nn.Sequential(
-                nn.Conv2d(
+                self.encoder_conv_op(
                     in_channels=self.color_ch,
                     out_channels=self.encoder_n_filters,
                     kernel_size=5,
-                    padding=2,
+                    padding="same",
                     stride=stride,
                 ),
                 nonlin,
                 BottomUpDeterministicResBlock(
                     c_in=self.encoder_n_filters,
                     c_out=self.encoder_n_filters,
+                    conv_strides=self.encoder_conv_strides,
                     nonlin=nonlin,
                     downsample=False,
                     batchnorm=self.bottomup_batchnorm,
                     dropout=self.encoder_dropout,
                     res_block_type=self.res_block_type,
-                    skip_padding=self.encoder_res_block_skip_padding,
                 ),
             )
             lowres_first_bottom_ups.append(first_bottom_up)
@@ -493,10 +555,9 @@ class LadderVAE(nn.Module):
         )
     ### SET OF FORWARD-LIKE METHODS
-    def bottomup_pass(self, inp: torch.Tensor) -> List[torch.Tensor]:
-        """
-        Wrapper of _bottomup_pass().
-        """
+    def bottomup_pass(self, inp: torch.Tensor) -> list[torch.Tensor]:
+        """Wrapper of _bottomup_pass()."""
+        # TODO Remove wrapper
         return self._bottomup_pass(
             inp,
             self.first_bottom_up,
@@ -510,9 +571,10 @@ class LadderVAE(nn.Module):
         first_bottom_up: nn.Sequential,
         lowres_first_bottom_ups: nn.ModuleList,
         bottom_up_layers: nn.ModuleList,
-    ) -> List[torch.Tensor]:
+    ) -> list[torch.Tensor]:
         """
-        This method defines the forward pass through the LVAE Encoder, the so-called
+        Method defines the forward pass through the LVAE Encoder, the so-called.
         Bottom-Up pass.
         Parameters
@@ -541,7 +603,6 @@ class LadderVAE(nn.Module):
             lowres_x = None
             if self._multiscale_count > 1 and i + 1 < inp.shape[1]:
                 lowres_x = lowres_first_bottom_ups[i](inp[:, i + 1 : i + 2])
             x, bu_value = bottom_up_layers[i](x, lowres_x=lowres_x)
             bu_values.append(bu_value)
@@ -549,41 +610,40 @@ class LadderVAE(nn.Module):
     def topdown_pass(
         self,
-        bu_values: torch.Tensor = None,
-        n_img_prior: torch.Tensor = None,
-        mode_layers: Iterable[int] = None,
-        constant_layers: Iterable[int] = None,
-        forced_latent: List[torch.Tensor] = None,
-        top_down_layers: nn.ModuleList = None,
-        final_top_down_layer: nn.Sequential = None,
-    ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
+        bu_values: Union[torch.Tensor, None] = None,
+        n_img_prior: Union[torch.Tensor, None] = None,
+        constant_layers: Union[Iterable[int], None] = None,
+        forced_latent: Union[list[torch.Tensor], None] = None,
+        top_down_layers: Union[nn.ModuleList, None] = None,
+        final_top_down_layer: Union[nn.Sequential, None] = None,
+    ) -> tuple[torch.Tensor, dict[str, torch.Tensor]]:
         """
-        This method defines the forward pass through the LVAE Decoder, the so-called
+        Method defines the forward pass through the LVAE Decoder, the so-called.
         Top-Down pass.
         Parameters
         ----------
         bu_values: torch.Tensor, optional
-            Output of the bottom-up pass. It will have values from multiple layers of the ladder.
+            Output of the bottom-up pass. It will have values from multiple layers of
+            the ladder.
         n_img_prior: optional
-            When `bu_values` is `None`, `n_img_prior` indicates the number of images to generate
+            When `bu_values` is `None`, `n_img_prior` indicates the number of images to
+            generate
             from the prior (so bottom-up pass is not used at all here).
-        mode_layers: Iterable[int], optional
-            A sequence of indexes associated to the layers in which sampling is disabled and
-            the mode (mean value) is used instead. Set to `None` to avoid this behaviour.
         constant_layers: Iterable[int], optional
-            A sequence of indexes associated to the layers in which a single instance's z is
-            copied over the entire batch (bottom-up path is not used, so only prior is used here).
-            Set to `None` to avoid this behaviour.
-        forced_latent: List[torch.Tensor], optional
-            A list of tensors that are used as fixed latent variables (hence, sampling doesn't take
-            place in this case).
+            A sequence of indexes associated to the layers in which a single instance's
+            z is copied over the entire batch (bottom-up path is not used, so only prior
+            is used here). Set to `None` to avoid this behaviour.
+        forced_latent: list[torch.Tensor], optional
+            A list of tensors that are used as fixed latent variables (hence, sampling
+            doesn't take place in this case).
         top_down_layers: nn.ModuleList, optional
-            A list of top-down layers to use in the top-down pass. If `None`, the method uses the
-            default layers defined in the constructor.
+            A list of top-down layers to use in the top-down pass. If `None`, the method
+            uses the default layers defined in the constructor.
         final_top_down_layer: nn.Sequential, optional
-            The last top-down layer of the top-down pass. If `None`, the method uses the default
-            layers defined in the constructor.
+            The last top-down layer of the top-down pass. If `None`, the method uses the
+            default layers defined in the constructor.
         """
         if top_down_layers is None:
             top_down_layers = self.top_down_layers
@@ -591,11 +651,9 @@ class LadderVAE(nn.Module):
             final_top_down_layer = self.final_top_down
         # Default: no layer is sampled from the distribution's mode
-        if mode_layers is None:
-            mode_layers = []
         if constant_layers is None:
             constant_layers = []
-        prior_experiment = len(mode_layers) > 0 or len(constant_layers) > 0
+        prior_experiment = len(constant_layers) > 0
         # If the bottom-up inference values are not given, don't do
         # inference, sample from prior instead
@@ -608,11 +666,7 @@ class LadderVAE(nn.Module):
                 "if and only if we're not doing inference"
             )
             raise RuntimeError(msg)
-        if (
-            inference_mode
-            and prior_experiment
-            and (self.non_stochastic_version is False)
-        ):
+        if inference_mode and prior_experiment:
             msg = (
                 "Prior experiments (e.g. sampling from mode) are not"
                 " compatible with inference mode"
@@ -621,34 +675,24 @@ class LadderVAE(nn.Module):
         # Sampled latent variables at each layer
         z = [None] * self.n_layers
         # KL divergence of each layer
         kl = [None] * self.n_layers
         # Kl divergence restricted, only for the LC enabled setup denoiSplit.
         kl_restricted = [None] * self.n_layers
         # mean from which z is sampled.
         q_mu = [None] * self.n_layers
         # log(var) from which z is sampled.
         q_lv = [None] * self.n_layers
         # Spatial map of KL divergence for each layer
         kl_spatial = [None] * self.n_layers
         debug_qvar_max = [None] * self.n_layers
         kl_channelwise = [None] * self.n_layers
         if forced_latent is None:
             forced_latent = [None] * self.n_layers
-        # log p(z) where z is the sample in the topdown pass
-        # logprob_p = 0.
         # Top-down inference/generation loop
-        out = out_pre_residual = None
+        out = None
         for i in reversed(range(self.n_layers)):
             # If available, get deterministic node from bottom-up inference
             try:
                 bu_value = bu_values[i]
@@ -656,26 +700,23 @@ class LadderVAE(nn.Module):
                 bu_value = None
             # Whether the current layer should be sampled from the mode
-            use_mode = i in mode_layers
             constant_out = i in constant_layers
             # Input for skip connection
-            skip_input = out  # TODO or n? or both?
+            skip_input = out
             # Full top-down layer, including sampling and deterministic part
-            out, out_pre_residual, aux = top_down_layers[i](
+            out, aux = top_down_layers[i](
                 input_=out,
                 skip_connection_input=skip_input,
                 inference_mode=inference_mode,
                 bu_value=bu_value,
                 n_img_prior=n_img_prior,
-                use_mode=use_mode,
                 force_constant_output=constant_out,
                 forced_latent=forced_latent[i],
                 mode_pred=self.mode_pred,
                 var_clip_max=self._var_clip_max,
             )
             # Save useful variables
             z[i] = aux["z"]  # sampled variable at this layer (batch, ch, h, w)
             kl[i] = aux["kl_samplewise"]  # (batch, )
@@ -708,8 +749,10 @@ class LadderVAE(nn.Module):
         }
         return out, data
-    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
+    def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, dict[str, torch.Tensor]]:
         """
+        Forward pass through the LVAE model.
         Parameters
         ----------
         x: torch.Tensor
@@ -717,124 +760,40 @@ class LadderVAE(nn.Module):
         """
         img_size = x.size()[2:]
-        # Pad input to size equal to the closest power of 2
-        x_pad = self.pad_input(x)
         # Bottom-up inference: return list of length n_layers (bottom to top)
-        bu_values = self.bottomup_pass(x_pad)
+        bu_values = self.bottomup_pass(x)
         for i in range(0, self.skip_bottomk_buvalues):
             bu_values[i] = None
-        mode_layers = range(self.n_layers) if self.non_stochastic_version else None
+        if self._squish3d:
+            bu_values = [
+                torch.mean(self._3D_squisher[k](bu_value), dim=2)
+                for k, bu_value in enumerate(bu_values)
+            ]
         # Top-down inference/generation
-        out, td_data = self.topdown_pass(bu_values, mode_layers=mode_layers)
+        out, td_data = self.topdown_pass(bu_values)
         if out.shape[-1] > img_size[-1]:
             # Restore original image size
             out = crop_img_tensor(out, img_size)
         out = self.output_layer(out)
-        if self._tethered_to_input:
-            assert out.shape[1] == 1
-            ch2 = self.get_other_channel(out, x_pad)
-            out = torch.cat([out, ch2], dim=1)
         return out, td_data
-    ### SET OF UTILS METHODS
-    # def sample_prior(
-    #         self,
-    #         n_imgs,
-    #         mode_layers=None,
-    #         constant_layers=None
-    #     ):
-    #     # Generate from prior
-    #     out, _ = self.topdown_pass(n_img_prior=n_imgs, mode_layers=mode_layers, constant_layers=constant_layers)
-    #     out = crop_img_tensor(out, self.img_shape)
-    #     # Log likelihood and other info (per data point)
-    #     _, likelihood_data = self.likelihood(out, None)
-    #     return likelihood_data['sample']
-    # ### ???
-    # def sample_from_q(self, x, masks=None):
-    #     """
-    #     This method performs the bottomup_pass() and samples from the
-    #     obtained distribution.
-    #     """
-    #     img_size = x.size()[2:]
-    #     # Pad input to make everything easier with conv strides
-    #     x_pad = self.pad_input(x)
-    #     # Bottom-up inference: return list of length n_layers (bottom to top)
-    #     bu_values = self.bottomup_pass(x_pad)
-    #     return self._sample_from_q(bu_values, masks=masks)
-    # ### ???
-    # def _sample_from_q(self, bu_values, top_down_layers=None, final_top_down_layer=None, masks=None):
-    #     if top_down_layers is None:
-    #         top_down_layers = self.top_down_layers
-    #     if final_top_down_layer is None:
-    #         final_top_down_layer = self.final_top_down
-    #     if masks is None:
-    #         masks = [None] * len(bu_values)
-    #     msg = "Multiscale is not supported as of now. You need the output from the previous layers to do this."
-    #     assert self.n_layers == 1, msg
-    #     samples = []
-    #     for i in reversed(range(self.n_layers)):
-    #         bu_value = bu_values[i]
-    #         # Note that the first argument can be set to None since we are just dealing with one level
-    #         sample = top_down_layers[i].sample_from_q(None, bu_value, var_clip_max=self._var_clip_max, mask=masks[i])
-    #         samples.append(sample)
-    #     return samples
-    def reset_for_different_output_size(self, output_size: int) -> None:
-        """Reset shape of output and latent tensors for different output size.
-        Used during evaluation to reset expected shapes of tensors when
-        input/output shape changes.
-        For instance, it is needed when the model was trained on, say, 64x64 sized
-        patches, but prediction is done on 128x128 patches.
-        """
-        for i in range(self.n_layers):
-            sz = output_size // 2 ** (1 + i)
-            self.bottom_up_layers[i].output_expected_shape = (sz, sz)
-            self.top_down_layers[i].latent_shape = (output_size, output_size)
-    def pad_input(self, x):
-        """
-        Pads input x so that its sizes are powers of 2
-        :param x:
-        :return: Padded tensor
-        """
-        size = self.get_padded_size(x.size())
-        x = pad_img_tensor(x, size)
-        return x
     ### SET OF GETTERS
     def get_padded_size(self, size):
         """
         Returns the smallest size (H, W) of the image with actual size given
         as input, such that H and W are powers of 2.
-        :param size: input size, tuple either (N, C, H, w) or (H, W)
+        :param size: input size, tuple either (N, C, H, W) or (H, W)
         :return: 2-tuple (H, W)
         """
         # Make size argument into (heigth, width)
-        if len(size) == 4:
-            size = size[2:]
-        if len(size) != 2:
-            msg = (
-                "input size must be either (N, C, H, W) or (H, W), but it "
-                f"has length {len(size)} (size={size})"
-            )
-            raise RuntimeError(msg)
+        # assert len(size) in [2, 4, 5] # TODO commented out cuz it's weird
+        # We're only interested in the Y,X dimensions
+        size = size[-2:]
         if self.multiscale_decoder_retain_spatial_dims is True:
             # In this case, we can go much more deeper and so this is not required
@@ -845,24 +804,21 @@ class LadderVAE(nn.Module):
         dwnsc = self.overall_downscale_factor
         # Output smallest powers of 2 that are larger than current sizes
-        padded_size = list(((s - 1) // dwnsc + 1) * dwnsc for s in size)
+        padded_size = [((s - 1) // dwnsc + 1) * dwnsc for s in size]
+        # TODO Needed for pad/crop odd sizes. Move to dataset?
         return padded_size
     def get_latent_spatial_size(self, level_idx: int):
-        """
-        level_idx: 0 is the bottommost layer, the highest resolution one.
-        """
+        """Level_idx: 0 is the bottommost layer, the highest resolution one."""
         actual_downsampling = level_idx + 1
         dwnsc = 2**actual_downsampling
-        sz = self.get_padded_size(self.img_shape)
+        sz = self.get_padded_size(self.image_size)
         h = sz[0] // dwnsc
         w = sz[1] // dwnsc
         assert h == w
         return h
     def get_top_prior_param_shape(self, n_imgs: int = 1):
-        # TODO num channels depends on random variable we're using
         # Compute the total downscaling performed in the Encoder
         if self.multiscale_decoder_retain_spatial_dims is False:
@@ -872,26 +828,12 @@ class LadderVAE(nn.Module):
             actual_downsampling = self.n_layers + 1 - self._multiscale_count
             dwnsc = 2**actual_downsampling
-        sz = self.get_padded_size(self.img_shape)
-        h = sz[0] // dwnsc
-        w = sz[1] // dwnsc
-        c = self.z_dims[-1] * 2  # mu and logvar
-        top_layer_shape = (n_imgs, c, h, w)
+        h = self.image_size[-2] // dwnsc
+        w = self.image_size[-1] // dwnsc
+        mu_logvar = self.z_dims[-1] * 2  # mu and logvar
+        top_layer_shape = (n_imgs, mu_logvar, h, w)
+        # TODO refactor!
+        if self._model_3D_depth > 1 and self._decoder_mode_3D is True:
+            # TODO check if model_3D_depth is needed ?
+            top_layer_shape = (n_imgs, mu_logvar, self._model_3D_depth, h, w)
         return top_layer_shape
-    def get_other_channel(self, ch1, input):
-        assert self.data_std["target"].squeeze().shape == (2,)
-        assert self.data_mean["target"].squeeze().shape == (2,)
-        assert self.target_ch == 2
-        ch1_un = (
-            ch1[:, :1] * self.data_std["target"][:, :1]
-            + self.data_mean["target"][:, :1]
-        )
-        input_un = input * self.data_std["input"] + self.data_mean["input"]
-        ch2_un = self._tethered_ch2_scalar * (
-            input_un - ch1_un * self._tethered_ch1_scalar
-        )
-        ch2 = (ch2_un - self.data_mean["target"][:, -1:]) / self.data_std["target"][
-            :, -1:
-        ]
-        return ch2

careamics 0.0.4.2__py3-none-any.whl → 0.0.5__py3-none-any.whl

Potentially problematic release.

careamics 0.0.4.2py3-none-any.whl → 0.0.5py3-none-any.whl