PyPI - monai-weekly - Versions diffs - 1.4.dev2431__py3-none-any.whl → 1.4.dev2435__py3-none-any.whl - Mend

monai-weekly 1.4.dev2431py3-none-any.whl → 1.4.dev2435py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

monai/__init__.py +1 -1
monai/_version.py +3 -3
monai/apps/generation/maisi/networks/autoencoderkl_maisi.py +43 -25
monai/apps/generation/maisi/networks/controlnet_maisi.py +15 -18
monai/apps/generation/maisi/networks/diffusion_model_unet_maisi.py +18 -18
monai/apps/vista3d/inferer.py +177 -0
monai/apps/vista3d/sampler.py +179 -0
monai/apps/vista3d/transforms.py +224 -0
monai/bundle/scripts.py +29 -17
monai/data/utils.py +1 -1
monai/data/wsi_datasets.py +3 -3
monai/inferers/utils.py +1 -0
monai/losses/__init__.py +1 -0
monai/losses/dice.py +10 -1
monai/losses/nacl_loss.py +139 -0
monai/networks/blocks/crossattention.py +48 -26
monai/networks/blocks/mlp.py +16 -4
monai/networks/blocks/selfattention.py +75 -23
monai/networks/blocks/spatialattention.py +16 -1
monai/networks/blocks/transformerblock.py +17 -2
monai/networks/layers/filtering.py +6 -2
monai/networks/nets/__init__.py +2 -1
monai/networks/nets/autoencoderkl.py +55 -22
monai/networks/nets/cell_sam_wrapper.py +92 -0
monai/networks/nets/controlnet.py +24 -22
monai/networks/nets/diffusion_model_unet.py +159 -19
monai/networks/nets/segresnet_ds.py +127 -1
monai/networks/nets/spade_autoencoderkl.py +22 -0
monai/networks/nets/spade_diffusion_model_unet.py +39 -2
monai/networks/nets/transformer.py +17 -17
monai/networks/nets/vista3d.py +946 -0
monai/networks/utils.py +4 -4
monai/transforms/__init__.py +13 -2
monai/transforms/io/array.py +59 -3
monai/transforms/io/dictionary.py +29 -2
monai/transforms/spatial/functional.py +1 -1
monai/transforms/transform.py +2 -2
monai/transforms/utility/dictionary.py +4 -0
monai/transforms/utils.py +230 -1
monai/{apps/generation/maisi/utils/morphological_ops.py → transforms/utils_morphological_ops.py} +2 -0
monai/transforms/utils_pytorch_numpy_unification.py +2 -2
monai/utils/enums.py +1 -0
monai/utils/module.py +7 -6
{monai_weekly-1.4.dev2431.dist-info → monai_weekly-1.4.dev2435.dist-info}/METADATA +84 -81
{monai_weekly-1.4.dev2431.dist-info → monai_weekly-1.4.dev2435.dist-info}/RECORD +49 -43
{monai_weekly-1.4.dev2431.dist-info → monai_weekly-1.4.dev2435.dist-info}/WHEEL +1 -1
/monai/apps/{generation/maisi/utils → vista3d}/__init__.py +0 -0
{monai_weekly-1.4.dev2431.dist-info → monai_weekly-1.4.dev2435.dist-info}/LICENSE +0 -0
{monai_weekly-1.4.dev2431.dist-info → monai_weekly-1.4.dev2435.dist-info}/top_level.txt +0 -0

monai/__init__.py CHANGED Viewed

@@ -93,4 +93,4 @@ except BaseException:
     if MONAIEnvVars.debug():
         raise
-__commit_id__ = "56ee32e36c5c0c7a5cb10afa4ec5589c81171e6b"
+__commit_id__ = "fa1ef8be157d5eb96de17aa78642384f68d99397"

monai/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2024-08-04T02:19:41+0000",
+ "date": "2024-09-01T02:28:54+0000",
  "dirty": false,
  "error": null,
- "full-revisionid": "951a77d7a7737a3108afa94623a50b87d21eb4a7",
- "version": "1.4.dev2431"
+ "full-revisionid": "d311b1d7b12a95dd7de995b507ffbb5ed413bab6",
+ "version": "1.4.dev2435"
 }
 '''  # END VERSION_JSON

monai/apps/generation/maisi/networks/autoencoderkl_maisi.py CHANGED Viewed

@@ -13,25 +13,17 @@ from __future__ import annotations
 import gc
 import logging
-from typing import TYPE_CHECKING, Sequence, cast
+from typing import Sequence
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from monai.networks.blocks import Convolution
-from monai.utils import optional_import
+from monai.networks.blocks.spatialattention import SpatialAttentionBlock
+from monai.networks.nets.autoencoderkl import AEKLResBlock, AutoencoderKL
 from monai.utils.type_conversion import convert_to_tensor
-AttentionBlock, has_attentionblock = optional_import("generative.networks.nets.autoencoderkl", name="AttentionBlock")
-AutoencoderKL, has_autoencoderkl = optional_import("generative.networks.nets.autoencoderkl", name="AutoencoderKL")
-ResBlock, has_resblock = optional_import("generative.networks.nets.autoencoderkl", name="ResBlock")
-if TYPE_CHECKING:
-    from generative.networks.nets.autoencoderkl import AutoencoderKL as AutoencoderKLType
-else:
-    AutoencoderKLType = cast(type, AutoencoderKL)
 # Set up logging configuration
 logger = logging.getLogger(__name__)
@@ -518,11 +510,13 @@ class MaisiEncoder(nn.Module):
         in_channels: Number of input channels.
         num_channels: Sequence of block output channels.
         out_channels: Number of channels in the bottom layer (latent space) of the autoencoder.
-        num_res_blocks: Number of residual blocks (see ResBlock) per level.
+        num_res_blocks: Number of residual blocks (see AEKLResBlock) per level.
         norm_num_groups: Number of groups for the group norm layers.
         norm_eps: Epsilon for the normalization.
         attention_levels: Indicate which level from num_channels contain an attention block.
         with_nonlocal_attn: If True, use non-local attention block.
+        include_fc: whether to include the final linear layer in the attention block. Default to False.
+        use_combined_linear: whether to use a single linear layer for qkv projection in the attention block, default to False.
         use_flash_attention: If True, use flash attention for a memory efficient attention mechanism.
         num_splits: Number of splits for the input tensor.
         dim_split: Dimension of splitting for the input tensor.
@@ -547,6 +541,8 @@ class MaisiEncoder(nn.Module):
         print_info: bool = False,
         save_mem: bool = True,
         with_nonlocal_attn: bool = True,
+        include_fc: bool = False,
+        use_combined_linear: bool = False,
         use_flash_attention: bool = False,
     ) -> None:
         super().__init__()
@@ -603,11 +599,13 @@ class MaisiEncoder(nn.Module):
                 input_channel = output_channel
                 if attention_levels[i]:
                     blocks.append(
-                        AttentionBlock(
+                        SpatialAttentionBlock(
                             spatial_dims=spatial_dims,
                             num_channels=input_channel,
                             norm_num_groups=norm_num_groups,
                             norm_eps=norm_eps,
+                            include_fc=include_fc,
+                            use_combined_linear=use_combined_linear,
                             use_flash_attention=use_flash_attention,
                         )
                     )
@@ -626,7 +624,7 @@ class MaisiEncoder(nn.Module):
         if with_nonlocal_attn:
             blocks.append(
-                ResBlock(
+                AEKLResBlock(
                     spatial_dims=spatial_dims,
                     in_channels=num_channels[-1],
                     norm_num_groups=norm_num_groups,
@@ -636,16 +634,18 @@ class MaisiEncoder(nn.Module):
             )
             blocks.append(
-                AttentionBlock(
+                SpatialAttentionBlock(
                     spatial_dims=spatial_dims,
                     num_channels=num_channels[-1],
                     norm_num_groups=norm_num_groups,
                     norm_eps=norm_eps,
+                    include_fc=include_fc,
+                    use_combined_linear=use_combined_linear,
                     use_flash_attention=use_flash_attention,
                 )
             )
             blocks.append(
-                ResBlock(
+                AEKLResBlock(
                     spatial_dims=spatial_dims,
                     in_channels=num_channels[-1],
                     norm_num_groups=norm_num_groups,
@@ -699,11 +699,13 @@ class MaisiDecoder(nn.Module):
         num_channels: Sequence of block output channels.
         in_channels: Number of channels in the bottom layer (latent space) of the autoencoder.
         out_channels: Number of output channels.
-        num_res_blocks: Number of residual blocks (see ResBlock) per level.
+        num_res_blocks: Number of residual blocks (see AEKLResBlock) per level.
         norm_num_groups: Number of groups for the group norm layers.
         norm_eps: Epsilon for the normalization.
         attention_levels: Indicate which level from num_channels contain an attention block.
         with_nonlocal_attn: If True, use non-local attention block.
+        include_fc: whether to include the final linear layer in the attention block. Default to False.
+        use_combined_linear: whether to use a single linear layer for qkv projection in the attention block, default to False.
         use_flash_attention: If True, use flash attention for a memory efficient attention mechanism.
         use_convtranspose: If True, use ConvTranspose to upsample feature maps in decoder.
         num_splits: Number of splits for the input tensor.
@@ -729,6 +731,8 @@ class MaisiDecoder(nn.Module):
         print_info: bool = False,
         save_mem: bool = True,
         with_nonlocal_attn: bool = True,
+        include_fc: bool = False,
+        use_combined_linear: bool = False,
         use_flash_attention: bool = False,
         use_convtranspose: bool = False,
     ) -> None:
@@ -758,7 +762,7 @@ class MaisiDecoder(nn.Module):
         if with_nonlocal_attn:
             blocks.append(
-                ResBlock(
+                AEKLResBlock(
                     spatial_dims=spatial_dims,
                     in_channels=reversed_block_out_channels[0],
                     norm_num_groups=norm_num_groups,
@@ -767,16 +771,18 @@ class MaisiDecoder(nn.Module):
                 )
             )
             blocks.append(
-                AttentionBlock(
+                SpatialAttentionBlock(
                     spatial_dims=spatial_dims,
                     num_channels=reversed_block_out_channels[0],
                     norm_num_groups=norm_num_groups,
                     norm_eps=norm_eps,
+                    include_fc=include_fc,
+                    use_combined_linear=use_combined_linear,
                     use_flash_attention=use_flash_attention,
                 )
             )
             blocks.append(
-                ResBlock(
+                AEKLResBlock(
                     spatial_dims=spatial_dims,
                     in_channels=reversed_block_out_channels[0],
                     norm_num_groups=norm_num_groups,
@@ -812,11 +818,13 @@ class MaisiDecoder(nn.Module):
                 if reversed_attention_levels[i]:
                     blocks.append(
-                        AttentionBlock(
+                        SpatialAttentionBlock(
                             spatial_dims=spatial_dims,
                             num_channels=block_in_ch,
                             norm_num_groups=norm_num_groups,
                             norm_eps=norm_eps,
+                            include_fc=include_fc,
+                            use_combined_linear=use_combined_linear,
                             use_flash_attention=use_flash_attention,
                         )
                     )
@@ -870,7 +878,7 @@ class MaisiDecoder(nn.Module):
         return x
-class AutoencoderKlMaisi(AutoencoderKLType):
+class AutoencoderKlMaisi(AutoencoderKL):
     """
     AutoencoderKL with custom MaisiEncoder and MaisiDecoder.
@@ -886,6 +894,8 @@ class AutoencoderKlMaisi(AutoencoderKLType):
         norm_eps: Epsilon for the normalization.
         with_encoder_nonlocal_attn: If True, use non-local attention block in the encoder.
         with_decoder_nonlocal_attn: If True, use non-local attention block in the decoder.
+        include_fc: whether to include the final linear layer. Default to False.
+        use_combined_linear: whether to use a single linear layer for qkv projection, default to False.
         use_flash_attention: If True, use flash attention for a memory efficient attention mechanism.
         use_checkpointing: If True, use activation checkpointing.
         use_convtranspose: If True, use ConvTranspose to upsample feature maps in decoder.
@@ -909,6 +919,8 @@ class AutoencoderKlMaisi(AutoencoderKLType):
         norm_eps: float = 1e-6,
         with_encoder_nonlocal_attn: bool = False,
         with_decoder_nonlocal_attn: bool = False,
+        include_fc: bool = False,
+        use_combined_linear: bool = False,
         use_flash_attention: bool = False,
         use_checkpointing: bool = False,
         use_convtranspose: bool = False,
@@ -930,12 +942,14 @@ class AutoencoderKlMaisi(AutoencoderKLType):
             norm_eps,
             with_encoder_nonlocal_attn,
             with_decoder_nonlocal_attn,
-            use_flash_attention,
             use_checkpointing,
             use_convtranspose,
+            include_fc,
+            use_combined_linear,
+            use_flash_attention,
         )
-        self.encoder = MaisiEncoder(
+        self.encoder: nn.Module = MaisiEncoder(
             spatial_dims=spatial_dims,
             in_channels=in_channels,
             num_channels=num_channels,
@@ -945,6 +959,8 @@ class AutoencoderKlMaisi(AutoencoderKLType):
             norm_eps=norm_eps,
             attention_levels=attention_levels,
             with_nonlocal_attn=with_encoder_nonlocal_attn,
+            include_fc=include_fc,
+            use_combined_linear=use_combined_linear,
             use_flash_attention=use_flash_attention,
             num_splits=num_splits,
             dim_split=dim_split,
@@ -953,7 +969,7 @@ class AutoencoderKlMaisi(AutoencoderKLType):
             save_mem=save_mem,
         )
-        self.decoder = MaisiDecoder(
+        self.decoder: nn.Module = MaisiDecoder(
             spatial_dims=spatial_dims,
             num_channels=num_channels,
             in_channels=latent_channels,
@@ -963,6 +979,8 @@ class AutoencoderKlMaisi(AutoencoderKLType):
             norm_eps=norm_eps,
             attention_levels=attention_levels,
             with_nonlocal_attn=with_decoder_nonlocal_attn,
+            include_fc=include_fc,
+            use_combined_linear=use_combined_linear,
             use_flash_attention=use_flash_attention,
             use_convtranspose=use_convtranspose,
             num_splits=num_splits,

monai/apps/generation/maisi/networks/controlnet_maisi.py CHANGED Viewed

@@ -11,24 +11,15 @@
 from __future__ import annotations
-from typing import TYPE_CHECKING, Sequence, cast
+from typing import Sequence
 import torch
-from monai.utils import optional_import
+from monai.networks.nets.controlnet import ControlNet
+from monai.networks.nets.diffusion_model_unet import get_timestep_embedding
-ControlNet, has_controlnet = optional_import("generative.networks.nets.controlnet", name="ControlNet")
-get_timestep_embedding, has_get_timestep_embedding = optional_import(
-    "generative.networks.nets.diffusion_model_unet", name="get_timestep_embedding"
-)
-if TYPE_CHECKING:
-    from generative.networks.nets.controlnet import ControlNet as ControlNetType
-else:
-    ControlNetType = cast(type, ControlNet)
-class ControlNetMaisi(ControlNetType):
+class ControlNetMaisi(ControlNet):
     """
     Control network for diffusion models based on Zhang and Agrawala "Adding Conditional Control to Text-to-Image
     Diffusion Models" (https://arxiv.org/abs/2302.05543)
@@ -49,10 +40,12 @@ class ControlNetMaisi(ControlNetType):
         num_class_embeds: if specified (as an int), then this model will be class-conditional with `num_class_embeds`
             classes.
         upcast_attention: if True, upcast attention operations to full precision.
-        use_flash_attention: if True, use flash attention for a memory efficient attention mechanism.
         conditioning_embedding_in_channels: number of input channels for the conditioning embedding.
         conditioning_embedding_num_channels: number of channels for the blocks in the conditioning embedding.
         use_checkpointing: if True, use activation checkpointing to save memory.
+        include_fc: whether to include the final linear layer. Default to False.
+        use_combined_linear: whether to use a single linear layer for qkv projection, default to False.
+        use_flash_attention: if True, use flash attention for a memory efficient attention mechanism.
     """
     def __init__(
@@ -71,10 +64,12 @@ class ControlNetMaisi(ControlNetType):
         cross_attention_dim: int | None = None,
         num_class_embeds: int | None = None,
         upcast_attention: bool = False,
-        use_flash_attention: bool = False,
         conditioning_embedding_in_channels: int = 1,
-        conditioning_embedding_num_channels: Sequence[int] | None = (16, 32, 96, 256),
+        conditioning_embedding_num_channels: Sequence[int] = (16, 32, 96, 256),
         use_checkpointing: bool = True,
+        include_fc: bool = False,
+        use_combined_linear: bool = False,
+        use_flash_attention: bool = False,
     ) -> None:
         super().__init__(
             spatial_dims,
@@ -91,9 +86,11 @@ class ControlNetMaisi(ControlNetType):
             cross_attention_dim,
             num_class_embeds,
             upcast_attention,
-            use_flash_attention,
             conditioning_embedding_in_channels,
             conditioning_embedding_num_channels,
+            include_fc,
+            use_combined_linear,
+            use_flash_attention,
         )
         self.use_checkpointing = use_checkpointing
@@ -105,7 +102,7 @@ class ControlNetMaisi(ControlNetType):
         conditioning_scale: float = 1.0,
         context: torch.Tensor | None = None,
         class_labels: torch.Tensor | None = None,
-    ) -> tuple[Sequence[torch.Tensor], torch.Tensor]:
+    ) -> tuple[list[torch.Tensor], torch.Tensor]:
         emb = self._prepare_time_and_class_embedding(x, timesteps, class_labels)
         h = self._apply_initial_convolution(x)
         if self.use_checkpointing:

monai/apps/generation/maisi/networks/diffusion_model_unet_maisi.py CHANGED Viewed

@@ -37,21 +37,15 @@ import torch
 from torch import nn
 from monai.networks.blocks import Convolution
-from monai.utils import ensure_tuple_rep, optional_import
-from monai.utils.type_conversion import convert_to_tensor
-get_down_block, has_get_down_block = optional_import(
-    "generative.networks.nets.diffusion_model_unet", name="get_down_block"
-)
-get_mid_block, has_get_mid_block = optional_import(
-    "generative.networks.nets.diffusion_model_unet", name="get_mid_block"
-)
-get_timestep_embedding, has_get_timestep_embedding = optional_import(
-    "generative.networks.nets.diffusion_model_unet", name="get_timestep_embedding"
+from monai.networks.nets.diffusion_model_unet import (
+    get_down_block,
+    get_mid_block,
+    get_timestep_embedding,
+    get_up_block,
+    zero_module,
 )
-get_up_block, has_get_up_block = optional_import("generative.networks.nets.diffusion_model_unet", name="get_up_block")
-xformers, has_xformers = optional_import("xformers")
-zero_module, has_zero_module = optional_import("generative.networks.nets.diffusion_model_unet", name="zero_module")
+from monai.utils import ensure_tuple_rep
+from monai.utils.type_conversion import convert_to_tensor
 __all__ = ["DiffusionModelUNetMaisi"]
@@ -78,6 +72,8 @@ class DiffusionModelUNetMaisi(nn.Module):
         cross_attention_dim: Number of context dimensions to use.
         num_class_embeds: If specified (as an int), then this model will be class-conditional with `num_class_embeds` classes.
         upcast_attention: If True, upcast attention operations to full precision.
+        include_fc: whether to include the final linear layer. Default to False.
+        use_combined_linear: whether to use a single linear layer for qkv projection, default to False.
         use_flash_attention: If True, use flash attention for a memory efficient attention mechanism.
         dropout_cattn: If different from zero, this will be the dropout value for the cross-attention layers.
         include_top_region_index_input: If True, use top region index input.
@@ -102,6 +98,8 @@ class DiffusionModelUNetMaisi(nn.Module):
         cross_attention_dim: int | None = None,
         num_class_embeds: int | None = None,
         upcast_attention: bool = False,
+        include_fc: bool = False,
+        use_combined_linear: bool = False,
         use_flash_attention: bool = False,
         dropout_cattn: float = 0.0,
         include_top_region_index_input: bool = False,
@@ -152,9 +150,6 @@ class DiffusionModelUNetMaisi(nn.Module):
                 "`num_channels`."
             )
-        if use_flash_attention and not has_xformers:
-            raise ValueError("use_flash_attention is True but xformers is not installed.")
         if use_flash_attention is True and not torch.cuda.is_available():
             raise ValueError(
                 "torch.cuda.is_available() should be True but is False. Flash attention is only available for GPU."
@@ -210,7 +205,6 @@ class DiffusionModelUNetMaisi(nn.Module):
             input_channel = output_channel
             output_channel = num_channels[i]
             is_final_block = i == len(num_channels) - 1
             down_block = get_down_block(
                 spatial_dims=spatial_dims,
                 in_channels=input_channel,
@@ -227,6 +221,8 @@ class DiffusionModelUNetMaisi(nn.Module):
                 transformer_num_layers=transformer_num_layers,
                 cross_attention_dim=cross_attention_dim,
                 upcast_attention=upcast_attention,
+                include_fc=include_fc,
+                use_combined_linear=use_combined_linear,
                 use_flash_attention=use_flash_attention,
                 dropout_cattn=dropout_cattn,
             )
@@ -245,6 +241,8 @@ class DiffusionModelUNetMaisi(nn.Module):
             transformer_num_layers=transformer_num_layers,
             cross_attention_dim=cross_attention_dim,
             upcast_attention=upcast_attention,
+            include_fc=include_fc,
+            use_combined_linear=use_combined_linear,
             use_flash_attention=use_flash_attention,
             dropout_cattn=dropout_cattn,
         )
@@ -280,6 +278,8 @@ class DiffusionModelUNetMaisi(nn.Module):
                 transformer_num_layers=transformer_num_layers,
                 cross_attention_dim=cross_attention_dim,
                 upcast_attention=upcast_attention,
+                include_fc=include_fc,
+                use_combined_linear=use_combined_linear,
                 use_flash_attention=use_flash_attention,
                 dropout_cattn=dropout_cattn,
             )

monai/apps/vista3d/inferer.py ADDED Viewed

@@ -0,0 +1,177 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+import copy
+from collections.abc import Sequence
+from typing import Any
+import torch
+from monai.data.meta_tensor import MetaTensor
+from monai.utils import optional_import
+tqdm, _ = optional_import("tqdm", name="tqdm")
+__all__ = ["point_based_window_inferer"]
+def point_based_window_inferer(
+    inputs: torch.Tensor | MetaTensor,
+    roi_size: Sequence[int],
+    predictor: torch.nn.Module,
+    point_coords: torch.Tensor,
+    point_labels: torch.Tensor,
+    class_vector: torch.Tensor | None = None,
+    prompt_class: torch.Tensor | None = None,
+    prev_mask: torch.Tensor | MetaTensor | None = None,
+    point_start: int = 0,
+    center_only: bool = True,
+    margin: int = 5,
+    **kwargs: Any,
+) -> torch.Tensor:
+    """
+    Point-based window inferer that takes an input image, a set of points, and a model, and returns a segmented image.
+    The inferer algorithm crops the input image into patches that centered at the point sets, which is followed by
+    patch inference and average output stitching, and finally returns the segmented mask.
+    Args:
+        inputs: [1CHWD], input image to be processed.
+        roi_size: the spatial window size for inferences.
+            When its components have None or non-positives, the corresponding inputs dimension will be used.
+            if the components of the `roi_size` are non-positive values, the transform will use the
+            corresponding components of img size. For example, `roi_size=(32, -1)` will be adapted
+            to `(32, 64)` if the second spatial dimension size of img is `64`.
+        sw_batch_size: the batch size to run window slices.
+        predictor: the model. For vista3D, the output is [B, 1, H, W, D] which needs to be transposed to [1, B, H, W, D].
+            Add transpose=True in kwargs for vista3d.
+        point_coords: [B, N, 3]. Point coordinates for B foreground objects, each has N points.
+        point_labels: [B, N]. Point labels. 0/1 means negative/positive points for regular supported or zero-shot classes.
+            2/3 means negative/positive points for special supported classes (e.g. tumor, vessel).
+        class_vector: [B]. Used for class-head automatic segmentation. Can be None value.
+        prompt_class: [B]. The same as class_vector representing the point class and inform point head about
+            supported class or zeroshot, not used for automatic segmentation. If None, point head is default
+            to supported class segmentation.
+        prev_mask: [1, B, H, W, D]. The value is before sigmoid. An optional tensor of previously segmented masks.
+        point_start: only use points starting from this number. All points before this number is used to generate
+            prev_mask. This is used to avoid re-calculating the points in previous iterations if given prev_mask.
+        center_only: for each point, only crop the patch centered at this point. If false, crop 3 patches for each point.
+        margin: if center_only is false, this value is the distance between point to the patch boundary.
+    Returns:
+        stitched_output: [1, B, H, W, D]. The value is before sigmoid.
+    Notice: The function only supports SINGLE OBJECT INFERENCE with B=1.
+    """
+    if not point_coords.shape[0] == 1:
+        raise ValueError("Only supports single object point click.")
+    if not len(inputs.shape) == 5:
+        raise ValueError("Input image should be 5D.")
+    image, pad = _pad_previous_mask(copy.deepcopy(inputs), roi_size)
+    point_coords = point_coords + torch.tensor([pad[-2], pad[-4], pad[-6]]).to(point_coords.device)
+    prev_mask = _pad_previous_mask(copy.deepcopy(prev_mask), roi_size)[0] if prev_mask is not None else None
+    stitched_output = None
+    for p in point_coords[0][point_start:]:
+        lx_, rx_ = _get_window_idx(p[0], roi_size[0], image.shape[-3], center_only=center_only, margin=margin)
+        ly_, ry_ = _get_window_idx(p[1], roi_size[1], image.shape[-2], center_only=center_only, margin=margin)
+        lz_, rz_ = _get_window_idx(p[2], roi_size[2], image.shape[-1], center_only=center_only, margin=margin)
+        for i in range(len(lx_)):
+            for j in range(len(ly_)):
+                for k in range(len(lz_)):
+                    lx, rx, ly, ry, lz, rz = (lx_[i], rx_[i], ly_[j], ry_[j], lz_[k], rz_[k])
+                    unravel_slice = [
+                        slice(None),
+                        slice(None),
+                        slice(int(lx), int(rx)),
+                        slice(int(ly), int(ry)),
+                        slice(int(lz), int(rz)),
+                    ]
+                    batch_image = image[unravel_slice]
+                    output = predictor(
+                        batch_image,
+                        point_coords=point_coords,
+                        point_labels=point_labels,
+                        class_vector=class_vector,
+                        prompt_class=prompt_class,
+                        patch_coords=unravel_slice,
+                        prev_mask=prev_mask,
+                        **kwargs,
+                    )
+                    if stitched_output is None:
+                        stitched_output = torch.zeros(
+                            [1, output.shape[1], image.shape[-3], image.shape[-2], image.shape[-1]], device="cpu"
+                        )
+                        stitched_mask = torch.zeros(
+                            [1, output.shape[1], image.shape[-3], image.shape[-2], image.shape[-1]], device="cpu"
+                        )
+                    stitched_output[unravel_slice] += output.to("cpu")
+                    stitched_mask[unravel_slice] = 1
+    # if stitched_mask is 0, then NaN value
+    stitched_output = stitched_output / stitched_mask
+    # revert padding
+    stitched_output = stitched_output[
+        :, :, pad[4] : image.shape[-3] - pad[5], pad[2] : image.shape[-2] - pad[3], pad[0] : image.shape[-1] - pad[1]
+    ]
+    stitched_mask = stitched_mask[
+        :, :, pad[4] : image.shape[-3] - pad[5], pad[2] : image.shape[-2] - pad[3], pad[0] : image.shape[-1] - pad[1]
+    ]
+    if prev_mask is not None:
+        prev_mask = prev_mask[
+            :,
+            :,
+            pad[4] : image.shape[-3] - pad[5],
+            pad[2] : image.shape[-2] - pad[3],
+            pad[0] : image.shape[-1] - pad[1],
+        ]
+        prev_mask = prev_mask.to("cpu")  # type: ignore
+        # for un-calculated place, use previous mask
+        stitched_output[stitched_mask < 1] = prev_mask[stitched_mask < 1]
+    if isinstance(inputs, torch.Tensor):
+        inputs = MetaTensor(inputs)
+    if not hasattr(stitched_output, "meta"):
+        stitched_output = MetaTensor(stitched_output, affine=inputs.meta["affine"], meta=inputs.meta)
+    return stitched_output
+def _get_window_idx_c(p: int, roi: int, s: int) -> tuple[int, int]:
+    """Helper function to get the window index."""
+    if p - roi // 2 < 0:
+        left, right = 0, roi
+    elif p + roi // 2 > s:
+        left, right = s - roi, s
+    else:
+        left, right = int(p) - roi // 2, int(p) + roi // 2
+    return left, right
+def _get_window_idx(p: int, roi: int, s: int, center_only: bool = True, margin: int = 5) -> tuple[list[int], list[int]]:
+    """Get the window index."""
+    left, right = _get_window_idx_c(p, roi, s)
+    if center_only:
+        return [left], [right]
+    left_most = max(0, p - roi + margin)
+    right_most = min(s, p + roi - margin)
+    left_list = [left_most, right_most - roi, left]
+    right_list = [left_most + roi, right_most, right]
+    return left_list, right_list
+def _pad_previous_mask(
+    inputs: torch.Tensor | MetaTensor, roi_size: Sequence[int], padvalue: int = 0
+) -> tuple[torch.Tensor | MetaTensor, list[int]]:
+    """Helper function to pad inputs."""
+    pad_size = []
+    for k in range(len(inputs.shape) - 1, 1, -1):
+        diff = max(roi_size[k - 2] - inputs.shape[k], 0)
+        half = diff // 2
+        pad_size.extend([half, diff - half])
+    if any(pad_size):
+        inputs = torch.nn.functional.pad(inputs, pad=pad_size, mode="constant", value=padvalue)  # type: ignore
+    return inputs, pad_size

monai-weekly 1.4.dev2431__py3-none-any.whl → 1.4.dev2435__py3-none-any.whl

monai-weekly 1.4.dev2431py3-none-any.whl → 1.4.dev2435py3-none-any.whl