PyPI - monai-weekly - Versions diffs - 1.4.dev2428__py3-none-any.whl → 1.4.dev2430__py3-none-any.whl - Mend

monai-weekly 1.4.dev2428py3-none-any.whl → 1.4.dev2430py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

monai/__init__.py +1 -1
monai/_version.py +3 -3
monai/apps/auto3dseg/hpo_gen.py +1 -1
monai/apps/detection/utils/anchor_utils.py +2 -2
monai/apps/pathology/transforms/post/array.py +7 -4
monai/auto3dseg/analyzer.py +1 -1
monai/bundle/scripts.py +204 -22
monai/bundle/utils.py +1 -0
monai/data/dataset_summary.py +1 -0
monai/data/meta_tensor.py +2 -2
monai/data/test_time_augmentation.py +2 -0
monai/data/utils.py +9 -6
monai/data/wsi_reader.py +2 -2
monai/engines/__init__.py +3 -1
monai/engines/trainer.py +281 -2
monai/engines/utils.py +76 -1
monai/handlers/mlflow_handler.py +21 -4
monai/inferers/__init__.py +5 -0
monai/inferers/inferer.py +1279 -1
monai/metrics/cumulative_average.py +2 -0
monai/metrics/panoptic_quality.py +1 -1
monai/metrics/rocauc.py +2 -2
monai/networks/blocks/__init__.py +3 -0
monai/networks/blocks/attention_utils.py +128 -0
monai/networks/blocks/crossattention.py +168 -0
monai/networks/blocks/rel_pos_embedding.py +56 -0
monai/networks/blocks/selfattention.py +74 -5
monai/networks/blocks/spade_norm.py +95 -0
monai/networks/blocks/spatialattention.py +82 -0
monai/networks/blocks/transformerblock.py +25 -4
monai/networks/blocks/upsample.py +22 -10
monai/networks/layers/__init__.py +2 -1
monai/networks/layers/factories.py +12 -1
monai/networks/layers/simplelayers.py +1 -1
monai/networks/layers/utils.py +14 -1
monai/networks/layers/vector_quantizer.py +233 -0
monai/networks/nets/__init__.py +9 -0
monai/networks/nets/autoencoderkl.py +702 -0
monai/networks/nets/controlnet.py +465 -0
monai/networks/nets/diffusion_model_unet.py +1913 -0
monai/networks/nets/patchgan_discriminator.py +230 -0
monai/networks/nets/quicknat.py +8 -6
monai/networks/nets/resnet.py +3 -4
monai/networks/nets/spade_autoencoderkl.py +480 -0
monai/networks/nets/spade_diffusion_model_unet.py +934 -0
monai/networks/nets/spade_network.py +435 -0
monai/networks/nets/swin_unetr.py +4 -3
monai/networks/nets/transformer.py +157 -0
monai/networks/nets/vqvae.py +472 -0
monai/networks/schedulers/__init__.py +17 -0
monai/networks/schedulers/ddim.py +294 -0
monai/networks/schedulers/ddpm.py +250 -0
monai/networks/schedulers/pndm.py +316 -0
monai/networks/schedulers/scheduler.py +205 -0
monai/networks/utils.py +22 -0
monai/transforms/croppad/array.py +8 -8
monai/transforms/croppad/dictionary.py +4 -4
monai/transforms/croppad/functional.py +1 -1
monai/transforms/regularization/array.py +4 -0
monai/transforms/spatial/array.py +1 -1
monai/transforms/utils_create_transform_ims.py +2 -4
monai/utils/__init__.py +1 -0
monai/utils/misc.py +5 -4
monai/utils/ordering.py +207 -0
monai/visualize/class_activation_maps.py +5 -5
monai/visualize/img2tensorboard.py +3 -1
{monai_weekly-1.4.dev2428.dist-info → monai_weekly-1.4.dev2430.dist-info}/METADATA +1 -1
{monai_weekly-1.4.dev2428.dist-info → monai_weekly-1.4.dev2430.dist-info}/RECORD +71 -50
{monai_weekly-1.4.dev2428.dist-info → monai_weekly-1.4.dev2430.dist-info}/WHEEL +1 -1
{monai_weekly-1.4.dev2428.dist-info → monai_weekly-1.4.dev2430.dist-info}/LICENSE +0 -0
{monai_weekly-1.4.dev2428.dist-info → monai_weekly-1.4.dev2430.dist-info}/top_level.txt +0 -0

monai/metrics/cumulative_average.py CHANGED Viewed

@@ -65,6 +65,7 @@ class CumulativeAverage:
         if self.val is None:
             return 0
+        val: NdarrayOrTensor
         val = self.val.clone()
         val[~torch.isfinite(val)] = 0
@@ -96,6 +97,7 @@ class CumulativeAverage:
             dist.all_reduce(sum)
             dist.all_reduce(count)
+        val: NdarrayOrTensor
         val = torch.where(count > 0, sum / count, sum)
         if to_numpy:

monai/metrics/panoptic_quality.py CHANGED Viewed

@@ -274,7 +274,7 @@ def _get_paired_iou(
         return paired_iou, paired_true, paired_pred
-    pairwise_iou = pairwise_iou.cpu().numpy()
+    pairwise_iou = pairwise_iou.cpu().numpy()  # type: ignore[assignment]
     paired_true, paired_pred = linear_sum_assignment(-pairwise_iou)
     paired_iou = pairwise_iou[paired_true, paired_pred]
     paired_true = torch.as_tensor(list(paired_true[paired_iou > match_iou_threshold] + 1), device=device)

monai/metrics/rocauc.py CHANGED Viewed

@@ -88,8 +88,8 @@ def _calculate(y_pred: torch.Tensor, y: torch.Tensor) -> float:
     n = len(y)
     indices = y_pred.argsort()
-    y = y[indices].cpu().numpy()
-    y_pred = y_pred[indices].cpu().numpy()
+    y = y[indices].cpu().numpy()  # type: ignore[assignment]
+    y_pred = y_pred[indices].cpu().numpy()  # type: ignore[assignment]
     nneg = auc = tmp_pos = tmp_neg = 0.0
     for i in range(n):

monai/networks/blocks/__init__.py CHANGED Viewed

@@ -17,6 +17,7 @@ from .aspp import SimpleASPP
 from .backbone_fpn_utils import BackboneWithFPN
 from .convolutions import Convolution, ResidualUnit
 from .crf import CRF
+from .crossattention import CrossAttentionBlock
 from .denseblock import ConvDenseBlock, DenseBlock
 from .dints_block import ActiConvNormBlock, FactorizedIncreaseBlock, FactorizedReduceBlock, P3DActiConvNormBlock
 from .downsample import MaxAvgPool
@@ -30,6 +31,8 @@ from .patchembedding import PatchEmbed, PatchEmbeddingBlock
 from .regunet_block import RegistrationDownSampleBlock, RegistrationExtractionBlock, RegistrationResidualConvBlock
 from .segresnet_block import ResBlock
 from .selfattention import SABlock
+from .spade_norm import SPADE
+from .spatialattention import SpatialAttentionBlock
 from .squeeze_and_excitation import (
     ChannelSELayer,
     ResidualSELayer,

monai/networks/blocks/attention_utils.py ADDED Viewed

@@ -0,0 +1,128 @@
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+from typing import Tuple
+import torch
+import torch.nn.functional as F
+from torch import nn
+def get_rel_pos(q_size: int, k_size: int, rel_pos: torch.Tensor) -> torch.Tensor:
+    """
+    Get relative positional embeddings according to the relative positions of
+    query and key sizes.
+    Args:
+        q_size (int): size of query q.
+        k_size (int): size of key k.
+        rel_pos (Tensor): relative position embeddings (L, C).
+    Returns:
+        Extracted positional embeddings according to relative positions.
+    """
+    rel_pos_resized: torch.Tensor = torch.Tensor()
+    max_rel_dist = int(2 * max(q_size, k_size) - 1)
+    # Interpolate rel pos if needed.
+    if rel_pos.shape[0] != max_rel_dist:
+        # Interpolate rel pos.
+        rel_pos_resized = F.interpolate(
+            rel_pos.reshape(1, rel_pos.shape[0], -1).permute(0, 2, 1), size=max_rel_dist, mode="linear"
+        )
+        rel_pos_resized = rel_pos_resized.reshape(-1, max_rel_dist).permute(1, 0)
+    else:
+        rel_pos_resized = rel_pos
+    # Scale the coords with short length if shapes for q and k are different.
+    q_coords = torch.arange(q_size)[:, None] * max(k_size / q_size, 1.0)
+    k_coords = torch.arange(k_size)[None, :] * max(q_size / k_size, 1.0)
+    relative_coords = (q_coords - k_coords) + (k_size - 1) * max(q_size / k_size, 1.0)
+    return rel_pos_resized[relative_coords.long()]
+def add_decomposed_rel_pos(
+    attn: torch.Tensor, q: torch.Tensor, rel_pos_lst: nn.ParameterList, q_size: Tuple, k_size: Tuple
+) -> torch.Tensor:
+    r"""
+    Calculate decomposed Relative Positional Embeddings from mvitv2 implementation:
+    https://github.com/facebookresearch/mvit/blob/19786631e330df9f3622e5402b4a419a263a2c80/mvit/models/attention.py
+    Only 2D and 3D are supported.
+    Encoding the relative position of tokens in the attention matrix: tokens spaced a distance
+    `d` apart will have the same embedding value (unlike absolute positional embedding).
+    .. math::
+        Attn_{logits}(Q, K) = (QK^{T} + E_{rel})*scale
+    where
+    .. math::
+        E_{ij}^{(rel)} = Q_{i}.R_{p(i), p(j)}
+    with :math:`R_{p(i), p(j)} \in R^{dim}` and :math:`p(i), p(j)`,
+    respectively spatial positions of element :math:`i` and :math:`j`
+    When using "decomposed" relative positional embedding, positional embedding is defined ("decomposed") as follow:
+    .. math::
+        R_{p(i), p(j)} = R^{d1}_{d1(i), d1(j)} + ... + R^{dn}_{dn(i), dn(j)}
+    with :math:`n = 1...dim`
+    Decomposed relative positional embedding reduces the complexity from :math:`\mathcal{O}(d1*...*dn)` to
+    :math:`\mathcal{O}(d1+...+dn)` compared with classical relative positional embedding.
+    Args:
+        attn (Tensor): attention map.
+        q (Tensor): query q in the attention layer with shape (B, s_dim_1 * ... * s_dim_n, C).
+        rel_pos_lst (ParameterList): relative position embeddings for each axis: rel_pos_lst[n] for nth axis.
+        q_size (Tuple): spatial sequence size of query q with (q_dim_1, ..., q_dim_n).
+        k_size (Tuple): spatial sequence size of key k with (k_dim_1, ...,  k_dim_n).
+    Returns:
+        attn (Tensor): attention logits with added relative positional embeddings.
+    """
+    rh = get_rel_pos(q_size[0], k_size[0], rel_pos_lst[0])
+    rw = get_rel_pos(q_size[1], k_size[1], rel_pos_lst[1])
+    batch, _, dim = q.shape
+    if len(rel_pos_lst) == 2:
+        q_h, q_w = q_size[:2]
+        k_h, k_w = k_size[:2]
+        r_q = q.reshape(batch, q_h, q_w, dim)
+        rel_h = torch.einsum("bhwc,hkc->bhwk", r_q, rh)
+        rel_w = torch.einsum("bhwc,wkc->bhwk", r_q, rw)
+        attn = (attn.view(batch, q_h, q_w, k_h, k_w) + rel_h[:, :, :, :, None] + rel_w[:, :, :, None, :]).view(
+            batch, q_h * q_w, k_h * k_w
+        )
+    elif len(rel_pos_lst) == 3:
+        q_h, q_w, q_d = q_size[:3]
+        k_h, k_w, k_d = k_size[:3]
+        rd = get_rel_pos(q_d, k_d, rel_pos_lst[2])
+        r_q = q.reshape(batch, q_h, q_w, q_d, dim)
+        rel_h = torch.einsum("bhwdc,hkc->bhwdk", r_q, rh)
+        rel_w = torch.einsum("bhwdc,wkc->bhwdk", r_q, rw)
+        rel_d = torch.einsum("bhwdc,wkc->bhwdk", r_q, rd)
+        attn = (
+            attn.view(batch, q_h, q_w, q_d, k_h, k_w, k_d)
+            + rel_h[:, :, :, :, None, None]
+            + rel_w[:, :, :, None, :, None]
+            + rel_d[:, :, :, None, None, :]
+        ).view(batch, q_h * q_w * q_d, k_h * k_w * k_d)
+    return attn

monai/networks/blocks/crossattention.py ADDED Viewed

@@ -0,0 +1,168 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+from typing import Optional, Tuple
+import torch
+import torch.nn as nn
+from monai.networks.layers.utils import get_rel_pos_embedding_layer
+from monai.utils import optional_import
+Rearrange, _ = optional_import("einops.layers.torch", name="Rearrange")
+class CrossAttentionBlock(nn.Module):
+    """
+    A cross-attention block, based on: "Dosovitskiy et al.,
+    An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>"
+    One can setup relative positional embedding as described in <https://arxiv.org/abs/2112.01526>
+    """
+    def __init__(
+        self,
+        hidden_size: int,
+        num_heads: int,
+        dropout_rate: float = 0.0,
+        hidden_input_size: int | None = None,
+        context_input_size: int | None = None,
+        dim_head: int | None = None,
+        qkv_bias: bool = False,
+        save_attn: bool = False,
+        causal: bool = False,
+        sequence_length: int | None = None,
+        rel_pos_embedding: Optional[str] = None,
+        input_size: Optional[Tuple] = None,
+        attention_dtype: Optional[torch.dtype] = None,
+    ) -> None:
+        """
+        Args:
+            hidden_size (int): dimension of hidden layer.
+            num_heads (int): number of attention heads.
+            dropout_rate (float, optional): fraction of the input units to drop. Defaults to 0.0.
+            hidden_input_size (int, optional): dimension of the input tensor. Defaults to hidden_size.
+            context_input_size (int, optional): dimension of the context tensor. Defaults to hidden_size.
+            dim_head (int, optional): dimension of each head. Defaults to hidden_size // num_heads.
+            qkv_bias (bool, optional): bias term for the qkv linear layer. Defaults to False.
+            save_attn (bool, optional): to make accessible the attention matrix. Defaults to False.
+            causal: whether to use causal attention.
+            sequence_length: if causal is True, it is necessary to specify the sequence length.
+            rel_pos_embedding (str, optional): Add relative positional embeddings to the attention map.
+                For now only "decomposed" is supported (see https://arxiv.org/abs/2112.01526). 2D and 3D are supported.
+            input_size (tuple(spatial_dim), optional): Input resolution for calculating the relative
+                positional parameter size.
+            attention_dtype: cast attention operations to this dtype.
+        """
+        super().__init__()
+        if not (0 <= dropout_rate <= 1):
+            raise ValueError("dropout_rate should be between 0 and 1.")
+        if dim_head:
+            inner_size = num_heads * dim_head
+            self.head_dim = dim_head
+        else:
+            if hidden_size % num_heads != 0:
+                raise ValueError("hidden size should be divisible by num_heads.")
+            inner_size = hidden_size
+            self.head_dim = hidden_size // num_heads
+        if causal and sequence_length is None:
+            raise ValueError("sequence_length is necessary for causal attention.")
+        self.num_heads = num_heads
+        self.hidden_input_size = hidden_input_size if hidden_input_size else hidden_size
+        self.context_input_size = context_input_size if context_input_size else hidden_size
+        self.out_proj = nn.Linear(inner_size, self.hidden_input_size)
+        # key, query, value projections
+        self.to_q = nn.Linear(self.hidden_input_size, inner_size, bias=qkv_bias)
+        self.to_k = nn.Linear(self.context_input_size, inner_size, bias=qkv_bias)
+        self.to_v = nn.Linear(self.context_input_size, inner_size, bias=qkv_bias)
+        self.input_rearrange = Rearrange("b h (l d) -> b l h d", l=num_heads)
+        self.out_rearrange = Rearrange("b h l d -> b l (h d)")
+        self.drop_output = nn.Dropout(dropout_rate)
+        self.drop_weights = nn.Dropout(dropout_rate)
+        self.scale = self.head_dim**-0.5
+        self.save_attn = save_attn
+        self.attention_dtype = attention_dtype
+        self.causal = causal
+        self.sequence_length = sequence_length
+        if causal and sequence_length is not None:
+            # causal mask to ensure that attention is only applied to the left in the input sequence
+            self.register_buffer(
+                "causal_mask",
+                torch.tril(torch.ones(sequence_length, sequence_length)).view(1, 1, sequence_length, sequence_length),
+            )
+            self.causal_mask: torch.Tensor
+        else:
+            self.causal_mask = torch.Tensor()
+        self.att_mat = torch.Tensor()
+        self.rel_positional_embedding = (
+            get_rel_pos_embedding_layer(rel_pos_embedding, input_size, self.head_dim, self.num_heads)
+            if rel_pos_embedding is not None
+            else None
+        )
+        self.input_size = input_size
+    def forward(self, x: torch.Tensor, context: Optional[torch.Tensor] = None):
+        """
+        Args:
+            x (torch.Tensor): input tensor. B x (s_dim_1 * ... * s_dim_n) x C
+            context (torch.Tensor, optional): context tensor. B x (s_dim_1 * ... * s_dim_n) x C
+        Return:
+            torch.Tensor: B x (s_dim_1 * ... * s_dim_n) x C
+        """
+        # calculate query, key, values for all heads in batch and move head forward to be the batch dim
+        b, t, c = x.size()  # batch size, sequence length, embedding dimensionality (hidden_size)
+        q = self.to_q(x)
+        kv = context if context is not None else x
+        _, kv_t, _ = kv.size()
+        k = self.to_k(kv)
+        v = self.to_v(kv)
+        if self.attention_dtype is not None:
+            q = q.to(self.attention_dtype)
+            k = k.to(self.attention_dtype)
+        q = q.view(b, t, self.num_heads, c // self.num_heads).transpose(1, 2)  # (b, nh, t,  hs)
+        k = k.view(b, kv_t, self.num_heads, c // self.num_heads).transpose(1, 2)  # (b, nh, kv_t, hs)
+        v = v.view(b, kv_t, self.num_heads, c // self.num_heads).transpose(1, 2)  # (b, nh, kv_t, hs)
+        att_mat = torch.einsum("blxd,blyd->blxy", q, k) * self.scale
+        # apply relative positional embedding if defined
+        att_mat = self.rel_positional_embedding(x, att_mat, q) if self.rel_positional_embedding is not None else att_mat
+        if self.causal:
+            att_mat = att_mat.masked_fill(self.causal_mask[:, :, :t, :kv_t] == 0, float("-inf"))
+        att_mat = att_mat.softmax(dim=-1)
+        if self.save_attn:
+            # no gradients and new tensor;
+            # https://pytorch.org/docs/stable/generated/torch.Tensor.detach.html
+            self.att_mat = att_mat.detach()
+        att_mat = self.drop_weights(att_mat)
+        x = torch.einsum("bhxy,bhyd->bhxd", att_mat, v)
+        x = self.out_rearrange(x)
+        x = self.out_proj(x)
+        x = self.drop_output(x)
+        return x

monai/networks/blocks/rel_pos_embedding.py ADDED Viewed

@@ -0,0 +1,56 @@
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+from typing import Iterable, Tuple
+import torch
+from torch import nn
+from monai.networks.blocks.attention_utils import add_decomposed_rel_pos
+from monai.utils.misc import ensure_tuple_size
+class DecomposedRelativePosEmbedding(nn.Module):
+    def __init__(self, s_input_dims: Tuple[int, int] | Tuple[int, int, int], c_dim: int, num_heads: int) -> None:
+        """
+        Args:
+            s_input_dims (Tuple): input spatial dimension. (H, W) or (H, W, D)
+            c_dim (int): channel dimension
+            num_heads(int): number of attention heads
+        """
+        super().__init__()
+        # validate inputs
+        if not isinstance(s_input_dims, Iterable) or len(s_input_dims) not in [2, 3]:
+            raise ValueError("s_input_dims must be set as follows: (H, W) or (H, W, D)")
+        self.s_input_dims = s_input_dims
+        self.c_dim = c_dim
+        self.num_heads = num_heads
+        self.rel_pos_arr = nn.ParameterList(
+            [nn.Parameter(torch.zeros(2 * dim_input_size - 1, c_dim)) for dim_input_size in s_input_dims]
+        )
+    def forward(self, x: torch.Tensor, att_mat: torch.Tensor, q: torch.Tensor) -> torch.Tensor:
+        """"""
+        batch = x.shape[0]
+        h, w, d = ensure_tuple_size(self.s_input_dims, 3, 1)
+        att_mat = add_decomposed_rel_pos(
+            att_mat.contiguous().view(batch * self.num_heads, h * w * d, h * w * d),
+            q.contiguous().view(batch * self.num_heads, h * w * d, -1),
+            self.rel_pos_arr,
+            (h, w) if d == 1 else (h, w, d),
+            (h, w) if d == 1 else (h, w, d),
+        )
+        att_mat = att_mat.reshape(batch, self.num_heads, h * w * d, h * w * d)
+        return att_mat

monai/networks/blocks/selfattention.py CHANGED Viewed

@@ -11,9 +11,12 @@
 from __future__ import annotations
+from typing import Optional, Tuple
 import torch
 import torch.nn as nn
+from monai.networks.layers.utils import get_rel_pos_embedding_layer
 from monai.utils import optional_import
 Rearrange, _ = optional_import("einops.layers.torch", name="Rearrange")
@@ -33,6 +36,12 @@ class SABlock(nn.Module):
         qkv_bias: bool = False,
         save_attn: bool = False,
         dim_head: int | None = None,
+        hidden_input_size: int | None = None,
+        causal: bool = False,
+        sequence_length: int | None = None,
+        rel_pos_embedding: Optional[str] = None,
+        input_size: Optional[Tuple] = None,
+        attention_dtype: Optional[torch.dtype] = None,
     ) -> None:
         """
         Args:
@@ -42,6 +51,14 @@ class SABlock(nn.Module):
             qkv_bias (bool, optional): bias term for the qkv linear layer. Defaults to False.
             save_attn (bool, optional): to make accessible the attention matrix. Defaults to False.
             dim_head (int, optional): dimension of each head. Defaults to hidden_size // num_heads.
+            hidden_input_size (int, optional): dimension of the input tensor. Defaults to hidden_size.
+            causal: whether to use causal attention (see https://arxiv.org/abs/1706.03762).
+            sequence_length: if causal is True, it is necessary to specify the sequence length.
+            rel_pos_embedding (str, optional): Add relative positional embeddings to the attention map.
+                For now only "decomposed" is supported (see https://arxiv.org/abs/2112.01526). 2D and 3D are supported.
+            input_size (tuple(spatial_dim), optional): Input resolution for calculating the relative
+                positional parameter size.
+            attention_dtype: cast attention operations to this dtype.
         """
@@ -53,12 +70,23 @@ class SABlock(nn.Module):
         if hidden_size % num_heads != 0:
             raise ValueError("hidden size should be divisible by num_heads.")
+        if dim_head:
+            self.inner_dim = num_heads * dim_head
+            self.dim_head = dim_head
+        else:
+            if hidden_size % num_heads != 0:
+                raise ValueError("hidden size should be divisible by num_heads.")
+            self.inner_dim = hidden_size
+            self.dim_head = hidden_size // num_heads
+        if causal and sequence_length is None:
+            raise ValueError("sequence_length is necessary for causal attention.")
         self.num_heads = num_heads
-        self.dim_head = hidden_size // num_heads if dim_head is None else dim_head
-        self.inner_dim = self.dim_head * num_heads
+        self.hidden_input_size = hidden_input_size if hidden_input_size else hidden_size
+        self.out_proj = nn.Linear(self.inner_dim, self.hidden_input_size)
-        self.out_proj = nn.Linear(self.inner_dim, hidden_size)
-        self.qkv = nn.Linear(hidden_size, self.inner_dim * 3, bias=qkv_bias)
+        self.qkv = nn.Linear(self.hidden_input_size, self.inner_dim * 3, bias=qkv_bias)
         self.input_rearrange = Rearrange("b h (qkv l d) -> qkv b l h d", qkv=3, l=num_heads)
         self.out_rearrange = Rearrange("b h l d -> b l (h d)")
         self.drop_output = nn.Dropout(dropout_rate)
@@ -66,11 +94,52 @@ class SABlock(nn.Module):
         self.scale = self.dim_head**-0.5
         self.save_attn = save_attn
         self.att_mat = torch.Tensor()
+        self.attention_dtype = attention_dtype
+        self.causal = causal
+        self.sequence_length = sequence_length
+        if causal and sequence_length is not None:
+            # causal mask to ensure that attention is only applied to the left in the input sequence
+            self.register_buffer(
+                "causal_mask",
+                torch.tril(torch.ones(sequence_length, sequence_length)).view(1, 1, sequence_length, sequence_length),
+            )
+            self.causal_mask: torch.Tensor
+        else:
+            self.causal_mask = torch.Tensor()
+        self.rel_positional_embedding = (
+            get_rel_pos_embedding_layer(rel_pos_embedding, input_size, self.dim_head, self.num_heads)
+            if rel_pos_embedding is not None
+            else None
+        )
+        self.input_size = input_size
     def forward(self, x):
+        """
+        Args:
+            x (torch.Tensor): input tensor. B x (s_dim_1 * ... * s_dim_n) x C
+        Return:
+            torch.Tensor: B x (s_dim_1 * ... * s_dim_n) x C
+        """
         output = self.input_rearrange(self.qkv(x))
         q, k, v = output[0], output[1], output[2]
-        att_mat = (torch.einsum("blxd,blyd->blxy", q, k) * self.scale).softmax(dim=-1)
+        if self.attention_dtype is not None:
+            q = q.to(self.attention_dtype)
+            k = k.to(self.attention_dtype)
+        att_mat = torch.einsum("blxd,blyd->blxy", q, k) * self.scale
+        # apply relative positional embedding if defined
+        att_mat = self.rel_positional_embedding(x, att_mat, q) if self.rel_positional_embedding is not None else att_mat
+        if self.causal:
+            att_mat = att_mat.masked_fill(self.causal_mask[:, :, : x.shape[1], : x.shape[1]] == 0, float("-inf"))
+        att_mat = att_mat.softmax(dim=-1)
         if self.save_attn:
             # no gradients and new tensor;
             # https://pytorch.org/docs/stable/generated/torch.Tensor.detach.html

monai/networks/blocks/spade_norm.py ADDED Viewed

@@ -0,0 +1,95 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from monai.networks.blocks import Convolution
+from monai.networks.layers.utils import get_norm_layer
+class SPADE(nn.Module):
+    """
+    Spatially Adaptive Normalization (SPADE) block, allowing for normalization of activations conditioned on a
+    semantic map. This block is used in SPADE-based image-to-image translation models, as described in
+    Semantic Image Synthesis with Spatially-Adaptive Normalization (https://arxiv.org/abs/1903.07291).
+    Args:
+        label_nc: number of semantic labels
+        norm_nc: number of output channels
+        kernel_size: kernel size
+        spatial_dims: number of spatial dimensions
+        hidden_channels: number of channels in the intermediate gamma and beta layers
+        norm: type of base normalisation used before applying the SPADE normalisation
+        norm_params: parameters for the base normalisation
+    """
+    def __init__(
+        self,
+        label_nc: int,
+        norm_nc: int,
+        kernel_size: int = 3,
+        spatial_dims: int = 2,
+        hidden_channels: int = 64,
+        norm: str | tuple = "INSTANCE",
+        norm_params: dict | None = None,
+    ) -> None:
+        super().__init__()
+        if norm_params is None:
+            norm_params = {}
+        if len(norm_params) != 0:
+            norm = (norm, norm_params)
+        self.param_free_norm = get_norm_layer(norm, spatial_dims=spatial_dims, channels=norm_nc)
+        self.mlp_shared = Convolution(
+            spatial_dims=spatial_dims,
+            in_channels=label_nc,
+            out_channels=hidden_channels,
+            kernel_size=kernel_size,
+            norm=None,
+            act="LEAKYRELU",
+        )
+        self.mlp_gamma = Convolution(
+            spatial_dims=spatial_dims,
+            in_channels=hidden_channels,
+            out_channels=norm_nc,
+            kernel_size=kernel_size,
+            act=None,
+        )
+        self.mlp_beta = Convolution(
+            spatial_dims=spatial_dims,
+            in_channels=hidden_channels,
+            out_channels=norm_nc,
+            kernel_size=kernel_size,
+            act=None,
+        )
+    def forward(self, x: torch.Tensor, segmap: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            x: input tensor with shape (B, C, [spatial-dimensions]) where C is the number of semantic channels.
+            segmap: input segmentation map (B, C, [spatial-dimensions]) where C is the number of semantic channels.
+            The map will be interpolated to the dimension of x internally.
+        """
+        # Part 1. generate parameter-free normalized activations
+        normalized = self.param_free_norm(x.contiguous())
+        # Part 2. produce scaling and bias conditioned on semantic map
+        segmap = F.interpolate(segmap, size=x.size()[2:], mode="nearest")
+        actv = self.mlp_shared(segmap)
+        gamma = self.mlp_gamma(actv)
+        beta = self.mlp_beta(actv)
+        out: torch.Tensor = normalized * (1 + gamma) + beta
+        return out

monai-weekly 1.4.dev2428__py3-none-any.whl → 1.4.dev2430__py3-none-any.whl

monai-weekly 1.4.dev2428py3-none-any.whl → 1.4.dev2430py3-none-any.whl