PyPI - monai-weekly - Versions diffs - 1.5.dev2446__py3-none-any.whl → 1.5.dev2448__py3-none-any.whl - Mend

monai-weekly 1.5.dev2446py3-none-any.whl → 1.5.dev2448py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

monai/__init__.py +1 -1
monai/_version.py +3 -3
monai/bundle/__init__.py +1 -1
monai/bundle/reference_resolver.py +10 -0
monai/bundle/workflows.py +187 -22
monai/networks/blocks/selfattention.py +18 -4
monai/networks/blocks/transformerblock.py +4 -2
monai/networks/nets/__init__.py +1 -0
monai/networks/nets/masked_autoencoder_vit.py +211 -0
monai/networks/nets/swin_unetr.py +24 -12
monai/transforms/__init__.py +9 -0
monai/transforms/utility/array.py +103 -6
monai/transforms/utility/dictionary.py +67 -0
monai/utils/module.py +3 -3
{monai_weekly-1.5.dev2446.dist-info → monai_weekly-1.5.dev2448.dist-info}/METADATA +71 -68
{monai_weekly-1.5.dev2446.dist-info → monai_weekly-1.5.dev2448.dist-info}/RECORD +19 -18
{monai_weekly-1.5.dev2446.dist-info → monai_weekly-1.5.dev2448.dist-info}/WHEEL +1 -1
{monai_weekly-1.5.dev2446.dist-info → monai_weekly-1.5.dev2448.dist-info}/LICENSE +0 -0
{monai_weekly-1.5.dev2446.dist-info → monai_weekly-1.5.dev2448.dist-info}/top_level.txt +0 -0

monai/__init__.py CHANGED Viewed

@@ -136,4 +136,4 @@ except BaseException:
     if MONAIEnvVars.debug():
         raise
-__commit_id__ = "13b96aedc48ad2da16149490b06a1a6bd8361335"
+__commit_id__ = "44e249d7d492d858199acfca1c948faa5aa33763"

monai/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2024-11-17T02:30:32+0000",
+ "date": "2024-12-01T02:35:43+0000",
  "dirty": false,
  "error": null,
- "full-revisionid": "218216250ce297265400abe56ee915898d75a2ec",
- "version": "1.5.dev2446"
+ "full-revisionid": "d4ff1455cf46b35e4dcfb6f57d54b0738b39f738",
+ "version": "1.5.dev2448"
 }
 '''  # END VERSION_JSON

monai/bundle/__init__.py CHANGED Viewed

@@ -43,4 +43,4 @@ from .utils import (
     MACRO_KEY,
     load_bundle_config,
 )
-from .workflows import BundleWorkflow, ConfigWorkflow
+from .workflows import BundleWorkflow, ConfigWorkflow, PythonicWorkflow

monai/bundle/reference_resolver.py CHANGED Viewed

@@ -192,6 +192,16 @@ class ReferenceResolver:
         """
         return self._resolve_one_item(id=id, **kwargs)
+    def remove_resolved_content(self, id: str) -> Any | None:
+        """
+        Remove the resolved ``ConfigItem`` by id.
+        Args:
+            id: id name of the expected item.
+        """
+        return self.resolved_content.pop(id) if id in self.resolved_content else None
     @classmethod
     def normalize_id(cls, id: str | int) -> str:
         """

monai/bundle/workflows.py CHANGED Viewed

@@ -44,12 +44,18 @@ class BundleWorkflow(ABC):
         workflow_type: specifies the workflow type: "train" or "training" for a training workflow,
             or "infer", "inference", "eval", "evaluation" for a inference workflow,
             other unsupported string will raise a ValueError.
-            default to `train` for train workflow.
+            default to `None` for only using meta properties.
         workflow: specifies the workflow type: "train" or "training" for a training workflow,
             or "infer", "inference", "eval", "evaluation" for a inference workflow,
             other unsupported string will raise a ValueError.
             default to `None` for common workflow.
-        properties_path: the path to the JSON file of properties.
+        properties_path: the path to the JSON file of properties. If `workflow_type` is specified, properties will be
+            loaded from the file based on the provided `workflow_type` and meta. If no `workflow_type` is specified,
+            properties will default to loading from "meta". If `properties_path` is None, default properties
+            will be sourced from "monai/bundle/properties.py" based on the workflow_type:
+            For a training workflow, properties load from `TrainProperties` and `MetaProperties`.
+            For a inference workflow, properties load from `InferProperties` and `MetaProperties`.
+            For workflow_type = None : only `MetaProperties` will be loaded.
         meta_file: filepath of the metadata file, if this is a list of file paths, their contents will be merged in order.
         logging_file: config file for `logging` module in the program. for more details:
             https://docs.python.org/3/library/logging.config.html#logging.config.fileConfig.
@@ -97,29 +103,50 @@ class BundleWorkflow(ABC):
                         meta_file = None
         workflow_type = workflow if workflow is not None else workflow_type
-        if workflow_type is None and properties_path is None:
-            self.properties = copy(MetaProperties)
-            self.workflow_type = None
-            self.meta_file = meta_file
-            return
+        if workflow_type is not None:
+            if workflow_type.lower() in self.supported_train_type:
+                workflow_type = "train"
+            elif workflow_type.lower() in self.supported_infer_type:
+                workflow_type = "infer"
+            else:
+                raise ValueError(f"Unsupported workflow type: '{workflow_type}'.")
         if properties_path is not None:
             properties_path = Path(properties_path)
             if not properties_path.is_file():
                 raise ValueError(f"Property file {properties_path} does not exist.")
             with open(properties_path) as json_file:
-                self.properties = json.load(json_file)
-            self.workflow_type = None
-            self.meta_file = meta_file
-            return
-        if workflow_type.lower() in self.supported_train_type:  # type: ignore[union-attr]
-            self.properties = {**TrainProperties, **MetaProperties}
-            self.workflow_type = "train"
-        elif workflow_type.lower() in self.supported_infer_type:  # type: ignore[union-attr]
-            self.properties = {**InferProperties, **MetaProperties}
-            self.workflow_type = "infer"
+                try:
+                    properties = json.load(json_file)
+                    self.properties: dict = {}
+                    if workflow_type is not None and workflow_type in properties:
+                        self.properties = properties[workflow_type]
+                        if "meta" in properties:
+                            self.properties.update(properties["meta"])
+                    elif workflow_type is None:
+                        if "meta" in properties:
+                            self.properties = properties["meta"]
+                            logger.info(
+                                "No workflow type specified, default to load meta properties from property file."
+                            )
+                        else:
+                            logger.warning("No 'meta' key found in properties while workflow_type is None.")
+                except KeyError as e:
+                    raise ValueError(f"{workflow_type} not found in property file {properties_path}") from e
+                except json.JSONDecodeError as e:
+                    raise ValueError(f"Error decoding JSON from property file {properties_path}") from e
         else:
-            raise ValueError(f"Unsupported workflow type: '{workflow_type}'.")
+            if workflow_type == "train":
+                self.properties = {**TrainProperties, **MetaProperties}
+            elif workflow_type == "infer":
+                self.properties = {**InferProperties, **MetaProperties}
+            elif workflow_type is None:
+                self.properties = copy(MetaProperties)
+                logger.info("No workflow type and property file specified, default to 'meta' properties.")
+            else:
+                raise ValueError(f"Unsupported workflow type: '{workflow_type}'.")
+        self.workflow_type = workflow_type
         self.meta_file = meta_file
     @abstractmethod
@@ -226,6 +253,124 @@ class BundleWorkflow(ABC):
         return [n for n, p in self.properties.items() if p.get(BundleProperty.REQUIRED, False) and not hasattr(self, n)]
+class PythonicWorkflow(BundleWorkflow):
+    """
+    Base class for the pythonic workflow specification in bundle, it can be a training, evaluation or inference workflow.
+    It defines the basic interfaces for the bundle workflow behavior: `initialize`, `finalize`, etc.
+    This also provides the interface to get / set public properties to interact with a bundle workflow through
+    defined `get_<property>` accessor methods or directly defining members of the object.
+    For how to set the properties, users can define the `_set_<property>` methods or directly set the members of the object.
+    The `initialize` method is called to set up the workflow before running. This method sets up internal state
+    and prepares properties. If properties are modified after the workflow has been initialized, `self._is_initialized`
+    is set to `False`. Before running the workflow again, `initialize` should be called to ensure that the workflow is
+    properly set up with the new property values.
+    Args:
+        workflow_type: specifies the workflow type: "train" or "training" for a training workflow,
+            or "infer", "inference", "eval", "evaluation" for a inference workflow,
+            other unsupported string will raise a ValueError.
+            default to `None` for only using meta properties.
+        workflow: specifies the workflow type: "train" or "training" for a training workflow,
+            or "infer", "inference", "eval", "evaluation" for a inference workflow,
+            other unsupported string will raise a ValueError.
+            default to `None` for common workflow.
+        properties_path: the path to the JSON file of properties. If `workflow_type` is specified, properties will be
+            loaded from the file based on the provided `workflow_type` and meta. If no `workflow_type` is specified,
+            properties will default to loading from "meta". If `properties_path` is None, default properties
+            will be sourced from "monai/bundle/properties.py" based on the workflow_type:
+            For a training workflow, properties load from `TrainProperties` and `MetaProperties`.
+            For a inference workflow, properties load from `InferProperties` and `MetaProperties`.
+            For workflow_type = None : only `MetaProperties` will be loaded.
+        config_file: path to the config file, typically used to store hyperparameters.
+        meta_file: filepath of the metadata file, if this is a list of file paths, their contents will be merged in order.
+        logging_file: config file for `logging` module in the program. for more details:
+            https://docs.python.org/3/library/logging.config.html#logging.config.fileConfig.
+    """
+    supported_train_type: tuple = ("train", "training")
+    supported_infer_type: tuple = ("infer", "inference", "eval", "evaluation")
+    def __init__(
+        self,
+        workflow_type: str | None = None,
+        properties_path: PathLike | None = None,
+        config_file: str | Sequence[str] | None = None,
+        meta_file: str | Sequence[str] | None = None,
+        logging_file: str | None = None,
+        **override: Any,
+    ):
+        meta_file = str(Path(os.getcwd()) / "metadata.json") if meta_file is None else meta_file
+        super().__init__(
+            workflow_type=workflow_type, properties_path=properties_path, meta_file=meta_file, logging_file=logging_file
+        )
+        self._props_vals: dict = {}
+        self._set_props_vals: dict = {}
+        self.parser = ConfigParser()
+        if config_file is not None:
+            self.parser.read_config(f=config_file)
+        if self.meta_file is not None:
+            self.parser.read_meta(f=self.meta_file)
+        # the rest key-values in the _args are to override config content
+        self.parser.update(pairs=override)
+        self._is_initialized: bool = False
+    def initialize(self, *args: Any, **kwargs: Any) -> Any:
+        """
+        Initialize the bundle workflow before running.
+        """
+        self._props_vals = {}
+        self._is_initialized = True
+    def _get_property(self, name: str, property: dict) -> Any:
+        """
+        With specified property name and information, get the expected property value.
+        If the property is already generated, return from the bucket directly.
+        If user explicitly set the property, return it directly.
+        Otherwise, generate the expected property as a class private property with prefix "_".
+        Args:
+            name: the name of target property.
+            property: other information for the target property, defined in `TrainProperties` or `InferProperties`.
+        """
+        if not self._is_initialized:
+            raise RuntimeError("Please execute 'initialize' before getting any properties.")
+        value = None
+        if name in self._set_props_vals:
+            value = self._set_props_vals[name]
+        elif name in self._props_vals:
+            value = self._props_vals[name]
+        elif name in self.parser.config[self.parser.meta_key]:  # type: ignore[index]
+            id = self.properties.get(name, None).get(BundlePropertyConfig.ID, None)
+            value = self.parser[id]
+        else:
+            try:
+                value = getattr(self, f"get_{name}")()
+            except AttributeError as e:
+                if property[BundleProperty.REQUIRED]:
+                    raise ValueError(
+                        f"unsupported property '{name}' is required in the bundle properties,"
+                        f"need to implement a method 'get_{name}' to provide the property."
+                    ) from e
+            self._props_vals[name] = value
+        return value
+    def _set_property(self, name: str, property: dict, value: Any) -> Any:
+        """
+        With specified property name and information, set value for the expected property.
+        Stores user-reset initialized objects that should not be re-initialized and marks the workflow as not initialized.
+        Args:
+            name: the name of target property.
+            property: other information for the target property, defined in `TrainProperties` or `InferProperties`.
+            value: value to set for the property.
+        """
+        self._set_props_vals[name] = value
+        self._is_initialized = False
 class ConfigWorkflow(BundleWorkflow):
     """
     Specification for the config-based bundle workflow.
@@ -262,7 +407,13 @@ class ConfigWorkflow(BundleWorkflow):
             or "infer", "inference", "eval", "evaluation" for a inference workflow,
             other unsupported string will raise a ValueError.
             default to `None` for common workflow.
-        properties_path: the path to the JSON file of properties.
+        properties_path: the path to the JSON file of properties. If `workflow_type` is specified, properties will be
+            loaded from the file based on the provided `workflow_type` and meta. If no `workflow_type` is specified,
+            properties will default to loading from "train". If `properties_path` is None, default properties
+            will be sourced from "monai/bundle/properties.py" based on the workflow_type:
+            For a training workflow, properties load from `TrainProperties` and `MetaProperties`.
+            For a inference workflow, properties load from `InferProperties` and `MetaProperties`.
+            For workflow_type = None : only `MetaProperties` will be loaded.
         override: id-value pairs to override or add the corresponding config content.
             e.g. ``--net#input_chns 42``, ``--net %/data/other.json#net_arg``
@@ -324,7 +475,6 @@ class ConfigWorkflow(BundleWorkflow):
         self.parser.read_config(f=config_file)
         if self.meta_file is not None:
             self.parser.read_meta(f=self.meta_file)
         # the rest key-values in the _args are to override config content
         self.parser.update(pairs=override)
         self.init_id = init_id
@@ -394,8 +544,23 @@ class ConfigWorkflow(BundleWorkflow):
             ret.extend(wrong_props)
         return ret
-    def _run_expr(self, id: str, **kwargs: dict) -> Any:
-        return self.parser.get_parsed_content(id, **kwargs) if id in self.parser else None
+    def _run_expr(self, id: str, **kwargs: dict) -> list[Any]:
+        """
+        Evaluate the expression or expression list given by `id`. The resolved values from the evaluations are not stored,
+        allowing this to be evaluated repeatedly (eg. in streaming applications) without restarting the hosting process.
+        """
+        ret = []
+        if id in self.parser:
+            # suppose all the expressions are in a list, run and reset the expressions
+            if isinstance(self.parser[id], list):
+                for i in range(len(self.parser[id])):
+                    sub_id = f"{id}{ID_SEP_KEY}{i}"
+                    ret.append(self.parser.get_parsed_content(sub_id, **kwargs))
+                    self.parser.ref_resolver.remove_resolved_content(sub_id)
+            else:
+                ret.append(self.parser.get_parsed_content(id, **kwargs))
+                self.parser.ref_resolver.remove_resolved_content(id)
+        return ret
     def _get_prop_id(self, name: str, property: dict) -> Any:
         prop_id = property[BundlePropertyConfig.ID]

monai/networks/blocks/selfattention.py CHANGED Viewed

@@ -11,7 +11,7 @@
 from __future__ import annotations
-from typing import Tuple, Union
+from typing import Optional, Tuple, Union
 import torch
 import torch.nn as nn
@@ -154,10 +154,12 @@ class SABlock(nn.Module):
         )
         self.input_size = input_size
-    def forward(self, x):
+    def forward(self, x, attn_mask: Optional[torch.Tensor] = None):
         """
         Args:
             x (torch.Tensor): input tensor. B x (s_dim_1 * ... * s_dim_n) x C
+            attn_mask (torch.Tensor, optional): mask to apply to the attention matrix.
+            B x (s_dim_1 * ... * s_dim_n). Defaults to None.
         Return:
             torch.Tensor: B x (s_dim_1 * ... * s_dim_n) x C
@@ -176,7 +178,13 @@ class SABlock(nn.Module):
         if self.use_flash_attention:
             x = F.scaled_dot_product_attention(
-                query=q, key=k, value=v, scale=self.scale, dropout_p=self.dropout_rate, is_causal=self.causal
+                query=q,
+                key=k,
+                value=v,
+                attn_mask=attn_mask,
+                scale=self.scale,
+                dropout_p=self.dropout_rate,
+                is_causal=self.causal,
             )
         else:
             att_mat = torch.einsum("blxd,blyd->blxy", q, k) * self.scale
@@ -186,10 +194,16 @@ class SABlock(nn.Module):
                 att_mat = self.rel_positional_embedding(x, att_mat, q)
             if self.causal:
+                if attn_mask is not None:
+                    raise ValueError("Causal attention does not support attention masks.")
                 att_mat = att_mat.masked_fill(self.causal_mask[:, :, : x.shape[-2], : x.shape[-2]] == 0, float("-inf"))
-            att_mat = att_mat.softmax(dim=-1)
+            if attn_mask is not None:
+                attn_mask = attn_mask.unsqueeze(1).unsqueeze(2)
+                attn_mask = attn_mask.expand(-1, self.num_heads, -1, -1)
+                att_mat = att_mat.masked_fill(attn_mask == 0, float("-inf"))
+            att_mat = att_mat.softmax(dim=-1)
             if self.save_attn:
                 # no gradients and new tensor;
                 # https://pytorch.org/docs/stable/generated/torch.Tensor.detach.html

monai/networks/blocks/transformerblock.py CHANGED Viewed

@@ -90,8 +90,10 @@ class TransformerBlock(nn.Module):
             use_flash_attention=use_flash_attention,
         )
-    def forward(self, x: torch.Tensor, context: Optional[torch.Tensor] = None) -> torch.Tensor:
-        x = x + self.attn(self.norm1(x))
+    def forward(
+        self, x: torch.Tensor, context: Optional[torch.Tensor] = None, attn_mask: Optional[torch.Tensor] = None
+    ) -> torch.Tensor:
+        x = x + self.attn(self.norm1(x), attn_mask=attn_mask)
         if self.with_cross_attention:
             x = x + self.cross_attn(self.norm_cross_attn(x), context=context)
         x = x + self.mlp(self.norm2(x))

monai/networks/nets/__init__.py CHANGED Viewed

@@ -53,6 +53,7 @@ from .fullyconnectednet import FullyConnectedNet, VarFullyConnectedNet
 from .generator import Generator
 from .highresnet import HighResBlock, HighResNet
 from .hovernet import Hovernet, HoVernet, HoVerNet, HoverNet
+from .masked_autoencoder_vit import MaskedAutoEncoderViT
 from .mednext import (
     MedNeXt,
     MedNext,

monai/networks/nets/masked_autoencoder_vit.py ADDED Viewed

@@ -0,0 +1,211 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+from collections.abc import Sequence
+import numpy as np
+import torch
+import torch.nn as nn
+from monai.networks.blocks.patchembedding import PatchEmbeddingBlock
+from monai.networks.blocks.pos_embed_utils import build_sincos_position_embedding
+from monai.networks.blocks.transformerblock import TransformerBlock
+from monai.networks.layers import trunc_normal_
+from monai.utils import ensure_tuple_rep
+from monai.utils.module import look_up_option
+SUPPORTED_POS_EMBEDDING_TYPES = {"none", "learnable", "sincos"}
+__all__ = ["MaskedAutoEncoderViT"]
+class MaskedAutoEncoderViT(nn.Module):
+    """
+    Masked Autoencoder (ViT), based on: "Kaiming et al.,
+    Masked Autoencoders Are Scalable Vision Learners <https://arxiv.org/abs/2111.06377>"
+    Only a subset of the patches passes through the encoder. The decoder tries to reconstruct
+    the masked patches, resulting in improved training speed.
+    """
+    def __init__(
+        self,
+        in_channels: int,
+        img_size: Sequence[int] | int,
+        patch_size: Sequence[int] | int,
+        hidden_size: int = 768,
+        mlp_dim: int = 512,
+        num_layers: int = 12,
+        num_heads: int = 12,
+        masking_ratio: float = 0.75,
+        decoder_hidden_size: int = 384,
+        decoder_mlp_dim: int = 512,
+        decoder_num_layers: int = 4,
+        decoder_num_heads: int = 12,
+        proj_type: str = "conv",
+        pos_embed_type: str = "sincos",
+        decoder_pos_embed_type: str = "sincos",
+        dropout_rate: float = 0.0,
+        spatial_dims: int = 3,
+        qkv_bias: bool = False,
+        save_attn: bool = False,
+    ) -> None:
+        """
+        Args:
+            in_channels: dimension of input channels or the number of channels for input.
+            img_size: dimension of input image.
+            patch_size: dimension of patch size
+            hidden_size: dimension of hidden layer. Defaults to 768.
+            mlp_dim: dimension of feedforward layer. Defaults to 512.
+            num_layers:  number of transformer blocks. Defaults to 12.
+            num_heads: number of attention heads. Defaults to 12.
+            masking_ratio: ratio of patches to be masked. Defaults to 0.75.
+            decoder_hidden_size: dimension of hidden layer for decoder. Defaults to 384.
+            decoder_mlp_dim: dimension of feedforward layer for decoder. Defaults to 512.
+            decoder_num_layers: number of transformer blocks for decoder. Defaults to 4.
+            decoder_num_heads: number of attention heads for decoder. Defaults to 12.
+            proj_type: position embedding layer type. Defaults to "conv".
+            pos_embed_type: position embedding layer type. Defaults to "sincos".
+            decoder_pos_embed_type: position embedding layer type for decoder. Defaults to "sincos".
+            dropout_rate: fraction of the input units to drop. Defaults to 0.0.
+            spatial_dims: number of spatial dimensions. Defaults to 3.
+            qkv_bias: apply bias to the qkv linear layer in self attention block. Defaults to False.
+            save_attn: to make accessible the attention in self attention block. Defaults to False.
+        Examples::
+            # for single channel input with image size of (96,96,96), and sin-cos positional encoding
+            >>> net = MaskedAutoEncoderViT(in_channels=1, img_size=(96,96,96), patch_size=(16,16,16),
+            pos_embed_type='sincos')
+            # for 3-channel with image size of (128,128,128) and a learnable positional encoding
+            >>> net = MaskedAutoEncoderViT(in_channels=3, img_size=128, patch_size=16, pos_embed_type='learnable')
+            # for 3-channel with image size of (224,224) and a masking ratio of 0.25
+            >>> net = MaskedAutoEncoderViT(in_channels=3, img_size=(224,224), patch_size=(16,16), masking_ratio=0.25,
+            spatial_dims=2)
+        """
+        super().__init__()
+        if not (0 <= dropout_rate <= 1):
+            raise ValueError(f"dropout_rate should be between 0 and 1, got {dropout_rate}.")
+        if hidden_size % num_heads != 0:
+            raise ValueError("hidden_size should be divisible by num_heads.")
+        if decoder_hidden_size % decoder_num_heads != 0:
+            raise ValueError("decoder_hidden_size should be divisible by decoder_num_heads.")
+        self.patch_size = ensure_tuple_rep(patch_size, spatial_dims)
+        self.img_size = ensure_tuple_rep(img_size, spatial_dims)
+        self.spatial_dims = spatial_dims
+        for m, p in zip(self.img_size, self.patch_size):
+            if m % p != 0:
+                raise ValueError(f"patch_size={patch_size} should be divisible by img_size={img_size}.")
+        self.decoder_hidden_size = decoder_hidden_size
+        if masking_ratio <= 0 or masking_ratio >= 1:
+            raise ValueError(f"masking_ratio should be in the range (0, 1), got {masking_ratio}.")
+        self.masking_ratio = masking_ratio
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, hidden_size))
+        self.patch_embedding = PatchEmbeddingBlock(
+            in_channels=in_channels,
+            img_size=img_size,
+            patch_size=patch_size,
+            hidden_size=hidden_size,
+            num_heads=num_heads,
+            proj_type=proj_type,
+            pos_embed_type=pos_embed_type,
+            dropout_rate=dropout_rate,
+            spatial_dims=self.spatial_dims,
+        )
+        blocks = [
+            TransformerBlock(hidden_size, mlp_dim, num_heads, dropout_rate, qkv_bias, save_attn)
+            for _ in range(num_layers)
+        ]
+        self.blocks = nn.Sequential(*blocks, nn.LayerNorm(hidden_size))
+        # decoder
+        self.decoder_embed = nn.Linear(hidden_size, decoder_hidden_size)
+        self.mask_tokens = nn.Parameter(torch.zeros(1, 1, decoder_hidden_size))
+        self.decoder_pos_embed_type = look_up_option(decoder_pos_embed_type, SUPPORTED_POS_EMBEDDING_TYPES)
+        self.decoder_pos_embedding = nn.Parameter(torch.zeros(1, self.patch_embedding.n_patches, decoder_hidden_size))
+        decoder_blocks = [
+            TransformerBlock(decoder_hidden_size, decoder_mlp_dim, decoder_num_heads, dropout_rate, qkv_bias, save_attn)
+            for _ in range(decoder_num_layers)
+        ]
+        self.decoder_blocks = nn.Sequential(*decoder_blocks, nn.LayerNorm(decoder_hidden_size))
+        self.decoder_pred = nn.Linear(decoder_hidden_size, int(np.prod(self.patch_size)) * in_channels)
+        self._init_weights()
+    def _init_weights(self):
+        """
+        similar to monai/networks/blocks/patchembedding.py for the decoder positional encoding and for mask and
+        classification tokens
+        """
+        if self.decoder_pos_embed_type == "none":
+            pass
+        elif self.decoder_pos_embed_type == "learnable":
+            trunc_normal_(self.decoder_pos_embedding, mean=0.0, std=0.02, a=-2.0, b=2.0)
+        elif self.decoder_pos_embed_type == "sincos":
+            grid_size = []
+            for in_size, pa_size in zip(self.img_size, self.patch_size):
+                grid_size.append(in_size // pa_size)
+            self.decoder_pos_embedding = build_sincos_position_embedding(
+                grid_size, self.decoder_hidden_size, self.spatial_dims
+            )
+        else:
+            raise ValueError(f"decoder_pos_embed_type {self.decoder_pos_embed_type} not supported.")
+        # initialize patch_embedding like nn.Linear (instead of nn.Conv2d)
+        trunc_normal_(self.mask_tokens, mean=0.0, std=0.02, a=-2.0, b=2.0)
+        trunc_normal_(self.cls_token, mean=0.0, std=0.02, a=-2.0, b=2.0)
+    def _masking(self, x, masking_ratio: float | None = None):
+        batch_size, num_tokens, _ = x.shape
+        percentage_to_keep = 1 - masking_ratio if masking_ratio is not None else 1 - self.masking_ratio
+        selected_indices = torch.multinomial(
+            torch.ones(batch_size, num_tokens), int(percentage_to_keep * num_tokens), replacement=False
+        )
+        x_masked = x[torch.arange(batch_size).unsqueeze(1), selected_indices]  # gather the selected tokens
+        mask = torch.ones(batch_size, num_tokens, dtype=torch.int).to(x.device)
+        mask[torch.arange(batch_size).unsqueeze(-1), selected_indices] = 0
+        return x_masked, selected_indices, mask
+    def forward(self, x, masking_ratio: float | None = None):
+        x = self.patch_embedding(x)
+        x, selected_indices, mask = self._masking(x, masking_ratio=masking_ratio)
+        cls_tokens = self.cls_token.expand(x.shape[0], -1, -1)
+        x = torch.cat((cls_tokens, x), dim=1)
+        x = self.blocks(x)
+        # decoder
+        x = self.decoder_embed(x)
+        x_ = self.mask_tokens.repeat(x.shape[0], mask.shape[1], 1)
+        x_[torch.arange(x.shape[0]).unsqueeze(-1), selected_indices] = x[:, 1:, :]  # no cls token
+        x_ = x_ + self.decoder_pos_embedding
+        x = torch.cat([x[:, :1, :], x_], dim=1)
+        x = self.decoder_blocks(x)
+        x = self.decoder_pred(x)
+        x = x[:, 1:, :]
+        return x, mask

monai-weekly 1.5.dev2446__py3-none-any.whl → 1.5.dev2448__py3-none-any.whl

monai-weekly 1.5.dev2446py3-none-any.whl → 1.5.dev2448py3-none-any.whl