PyPI - monai-weekly - Versions diffs - 1.4.dev2434__py3-none-any.whl → 1.4.dev2436__py3-none-any.whl - Mend

monai-weekly 1.4.dev2434py3-none-any.whl → 1.4.dev2436py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

monai/__init__.py +44 -2
monai/_version.py +3 -3
monai/apps/vista3d/inferer.py +177 -0
monai/apps/vista3d/sampler.py +179 -0
monai/apps/vista3d/transforms.py +224 -0
monai/bundle/config_parser.py +5 -3
monai/bundle/scripts.py +2 -2
monai/bundle/utils.py +35 -1
monai/handlers/__init__.py +1 -0
monai/handlers/trt_handler.py +61 -0
monai/inferers/utils.py +1 -0
monai/metrics/generalized_dice.py +77 -48
monai/networks/__init__.py +2 -0
monai/networks/layers/filtering.py +6 -2
monai/networks/nets/swin_unetr.py +4 -4
monai/networks/nets/vista3d.py +53 -11
monai/networks/trt_compiler.py +569 -0
monai/networks/utils.py +225 -41
monai/transforms/__init__.py +24 -2
monai/transforms/io/array.py +58 -2
monai/transforms/io/dictionary.py +29 -2
monai/transforms/spatial/array.py +44 -0
monai/transforms/spatial/dictionary.py +61 -0
monai/transforms/spatial/functional.py +70 -1
monai/transforms/utility/array.py +153 -4
monai/transforms/utility/dictionary.py +105 -3
monai/transforms/utils.py +83 -10
monai/utils/__init__.py +1 -0
monai/utils/enums.py +1 -0
monai/utils/type_conversion.py +8 -0
{monai_weekly-1.4.dev2434.dist-info → monai_weekly-1.4.dev2436.dist-info}/METADATA +4 -1
{monai_weekly-1.4.dev2434.dist-info → monai_weekly-1.4.dev2436.dist-info}/RECORD +36 -31
{monai_weekly-1.4.dev2434.dist-info → monai_weekly-1.4.dev2436.dist-info}/WHEEL +1 -1
/monai/apps/{generation/maisi/utils → vista3d}/__init__.py +0 -0
{monai_weekly-1.4.dev2434.dist-info → monai_weekly-1.4.dev2436.dist-info}/LICENSE +0 -0
{monai_weekly-1.4.dev2434.dist-info → monai_weekly-1.4.dev2436.dist-info}/top_level.txt +0 -0

monai/bundle/scripts.py CHANGED Viewed

@@ -32,7 +32,7 @@ from monai._version import get_versions
 from monai.apps.utils import _basename, download_url, extractall, get_logger
 from monai.bundle.config_item import ConfigComponent
 from monai.bundle.config_parser import ConfigParser
-from monai.bundle.utils import DEFAULT_INFERENCE, DEFAULT_METADATA
+from monai.bundle.utils import DEFAULT_INFERENCE, DEFAULT_METADATA, merge_kv
 from monai.bundle.workflows import BundleWorkflow, ConfigWorkflow
 from monai.config import IgniteInfo, PathLike
 from monai.data import load_net_with_metadata, save_net_with_metadata
@@ -105,7 +105,7 @@ def update_kwargs(args: str | dict | None = None, ignore_none: bool = True, **kw
         if isinstance(v, dict) and isinstance(args_.get(k), dict):
             args_[k] = update_kwargs(args_[k], ignore_none, **v)
         else:
-            args_[k] = v
+            merge_kv(args_, k, v)
     return args_

monai/bundle/utils.py CHANGED Viewed

@@ -13,6 +13,7 @@ from __future__ import annotations
 import json
 import os
+import warnings
 import zipfile
 from typing import Any
@@ -21,12 +22,21 @@ from monai.utils import optional_import
 yaml, _ = optional_import("yaml")
-__all__ = ["ID_REF_KEY", "ID_SEP_KEY", "EXPR_KEY", "MACRO_KEY", "DEFAULT_MLFLOW_SETTINGS", "DEFAULT_EXP_MGMT_SETTINGS"]
+__all__ = [
+    "ID_REF_KEY",
+    "ID_SEP_KEY",
+    "EXPR_KEY",
+    "MACRO_KEY",
+    "MERGE_KEY",
+    "DEFAULT_MLFLOW_SETTINGS",
+    "DEFAULT_EXP_MGMT_SETTINGS",
+]
 ID_REF_KEY = "@"  # start of a reference to a ConfigItem
 ID_SEP_KEY = "::"  # separator for the ID of a ConfigItem
 EXPR_KEY = "$"  # start of a ConfigExpression
 MACRO_KEY = "%"  # start of a macro of a config
+MERGE_KEY = "+"  # prefix indicating merge instead of override in case of multiple configs.
 _conf_values = get_config_values()
@@ -233,3 +243,27 @@ def load_bundle_config(bundle_path: str, *config_names: str, **load_kw_args: Any
             parser.read_config(f=cdata)
     return parser
+def merge_kv(args: dict | Any, k: str, v: Any) -> None:
+    """
+    Update the `args` dict-like object with the key/value pair `k` and `v`.
+    """
+    if k.startswith(MERGE_KEY):
+        """
+        Both values associated with `+`-prefixed key pair must be of `dict` or `list` type.
+        `dict` values will be merged, `list` values - concatenated.
+        """
+        id = k[1:]
+        if id in args:
+            if isinstance(v, dict) and isinstance(args[id], dict):
+                args[id].update(v)
+            elif isinstance(v, list) and isinstance(args[id], list):
+                args[id].extend(v)
+            else:
+                raise ValueError(ValueError(f"config must be dict or list for key `{k}`, but got {type(v)}: {v}."))
+        else:
+            warnings.warn(f"Can't merge entry ['{k}'], '{id}' is not in target dict - copying instead.")
+            args[id] = v
+    else:
+        args[k] = v

monai/handlers/__init__.py CHANGED Viewed

@@ -40,5 +40,6 @@ from .smartcache_handler import SmartCacheHandler
 from .stats_handler import StatsHandler
 from .surface_distance import SurfaceDistance
 from .tensorboard_handlers import TensorBoardHandler, TensorBoardImageHandler, TensorBoardStatsHandler
+from .trt_handler import TrtHandler
 from .utils import from_engine, ignore_data, stopping_fn_from_loss, stopping_fn_from_metric, write_metrics_reports
 from .validation_handler import ValidationHandler

monai/handlers/trt_handler.py ADDED Viewed

@@ -0,0 +1,61 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from monai.config import IgniteInfo
+from monai.networks import trt_compile
+from monai.utils import min_version, optional_import
+Events, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Events")
+if TYPE_CHECKING:
+    from ignite.engine import Engine
+else:
+    Engine, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Engine")
+class TrtHandler:
+    """
+    TrtHandler acts as an Ignite handler to apply TRT acceleration to the model.
+    Usage example::
+        handler = TrtHandler(model=model, base_path="/test/checkpoint.pt", args={"precision": "fp16"})
+        handler.attach(engine)
+        engine.run()
+    """
+    def __init__(self, model, base_path, args=None, submodule=None):
+        """
+        Args:
+            base_path: TRT path basename. TRT plan(s) saved to "base_path[.submodule].plan"
+            args: passed to trt_compile(). See trt_compile() for details.
+            submodule : Hierarchical ids of submodules to convert, e.g. 'image_decoder.decoder'
+        """
+        self.model = model
+        self.base_path = base_path
+        self.args = args
+        self.submodule = submodule
+    def attach(self, engine: Engine) -> None:
+        """
+        Args:
+            engine: Ignite Engine, it can be a trainer, validator or evaluator.
+        """
+        self.logger = engine.logger
+        engine.add_event_handler(Events.STARTED, self)
+    def __call__(self, engine: Engine) -> None:
+        """
+        Args:
+            engine: Ignite Engine, it can be a trainer, validator or evaluator.
+        """
+        trt_compile(self.model, self.base_path, args=self.args, submodule=self.submodule, logger=self.logger)

monai/inferers/utils.py CHANGED Viewed

@@ -300,6 +300,7 @@ def sliding_window_inference(
     # remove padding if image_size smaller than roi_size
     if any(pad_size):
+        kwargs.update({"pad_size": pad_size})
         for ss, output_i in enumerate(output_image_list):
             zoom_scale = [_shape_d / _roi_size_d for _shape_d, _roi_size_d in zip(output_i.shape[2:], roi_size)]
             final_slicing: list[slice] = []

monai/metrics/generalized_dice.py CHANGED Viewed

@@ -14,34 +14,47 @@ from __future__ import annotations
 import torch
 from monai.metrics.utils import do_metric_reduction, ignore_background
-from monai.utils import MetricReduction, Weight, look_up_option
+from monai.utils import MetricReduction, Weight, deprecated_arg, deprecated_arg_default, look_up_option
 from .metric import CumulativeIterationMetric
 class GeneralizedDiceScore(CumulativeIterationMetric):
-    """Compute the Generalized Dice Score metric between tensors, as the complement of the Generalized Dice Loss defined in:
+    """
+    Compute the Generalized Dice Score metric between tensors.
+    This metric is the complement of the Generalized Dice Loss defined in:
     Sudre, C. et. al. (2017) Generalised Dice overlap as a deep learning
-        loss function for highly unbalanced segmentations. DLMIA 2017.
+    loss function for highly unbalanced segmentations. DLMIA 2017.
-    The inputs `y_pred` and `y` are expected to be one-hot, binarized channel-first
-    or batch-first tensors, i.e., CHW[D] or BCHW[D].
+    The inputs `y_pred` and `y` are expected to be one-hot, binarized batch-first tensors, i.e., NCHW[D].
     Example of the typical execution steps of this metric class follows :py:class:`monai.metrics.metric.Cumulative`.
     Args:
-        include_background (bool, optional): whether to include the background class (assumed to be in channel 0), in the
+        include_background: Whether to include the background class (assumed to be in channel 0) in the
             score computation. Defaults to True.
-        reduction (str, optional): define mode of reduction to the metrics. Available reduction modes:
-            {``"none"``, ``"mean_batch"``, ``"sum_batch"``}. Default to ``"mean_batch"``. If "none", will not do reduction.
-        weight_type (Union[Weight, str], optional): {``"square"``, ``"simple"``, ``"uniform"``}. Type of function to transform
+        reduction: Define mode of reduction to the metrics. Available reduction modes:
+            {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
+            ``"mean_channel"``, ``"sum_channel"``}, default to ``"mean"``. if "none", will not do reduction.
+        weight_type: {``"square"``, ``"simple"``, ``"uniform"``}. Type of function to transform
             ground truth volume into a weight factor. Defaults to ``"square"``.
     Raises:
-        ValueError: when the `weight_type` is not one of {``"none"``, ``"mean"``, ``"sum"``}.
+        ValueError: When the `reduction` is not one of MetricReduction enum.
     """
+    @deprecated_arg_default(
+        "reduction",
+        old_default=MetricReduction.MEAN_BATCH,
+        new_default=MetricReduction.MEAN,
+        since="1.4.0",
+        replaced="1.5.0",
+        msg_suffix=(
+            "Old versions computed `mean` when `mean_batch` was provided due to bug in reduction， "
+            "If you want to retain the old behavior (calculating the mean), please explicitly set the parameter to 'mean'."
+        ),
+    )
     def __init__(
         self,
         include_background: bool = True,
@@ -50,79 +63,90 @@ class GeneralizedDiceScore(CumulativeIterationMetric):
     ) -> None:
         super().__init__()
         self.include_background = include_background
-        reduction_options = [
-            "none",
-            "mean_batch",
-            "sum_batch",
-            MetricReduction.NONE,
-            MetricReduction.MEAN_BATCH,
-            MetricReduction.SUM_BATCH,
-        ]
-        self.reduction = reduction
-        if self.reduction not in reduction_options:
-            raise ValueError(f"reduction must be one of {reduction_options}")
+        self.reduction = look_up_option(reduction, MetricReduction)
         self.weight_type = look_up_option(weight_type, Weight)
+        self.sum_over_classes = self.reduction in {
+            MetricReduction.SUM,
+            MetricReduction.MEAN,
+            MetricReduction.MEAN_CHANNEL,
+            MetricReduction.SUM_CHANNEL,
+        }
     def _compute_tensor(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor:  # type: ignore[override]
-        """Computes the Generalized Dice Score and returns a tensor with its per image values.
+        """
+        Computes the Generalized Dice Score and returns a tensor with its per image values.
         Args:
-            y_pred (torch.Tensor): binarized segmentation model output. It must be in one-hot format and in the NCHW[D] format,
+            y_pred (torch.Tensor): Binarized segmentation model output. It must be in one-hot format and in the NCHW[D] format,
                 where N is the batch dimension, C is the channel dimension, and the remaining are the spatial dimensions.
-            y (torch.Tensor): binarized ground-truth. It must be in one-hot format and have the same shape as `y_pred`.
+            y (torch.Tensor): Binarized ground-truth. It must be in one-hot format and have the same shape as `y_pred`.
+        Returns:
+            torch.Tensor: Generalized Dice Score averaged across batch and class
         Raises:
-            ValueError: if `y_pred` and `y` have less than 3 dimensions, or `y_pred` and `y` don't have the same shape.
+            ValueError: If `y_pred` and `y` have less than 3 dimensions, or `y_pred` and `y` don't have the same shape.
         """
         return compute_generalized_dice(
-            y_pred=y_pred, y=y, include_background=self.include_background, weight_type=self.weight_type
+            y_pred=y_pred,
+            y=y,
+            include_background=self.include_background,
+            weight_type=self.weight_type,
+            sum_over_classes=self.sum_over_classes,
         )
+    @deprecated_arg(
+        "reduction",
+        since="1.3.3",
+        removed="1.7.0",
+        msg_suffix="Reduction will be ignored. Set reduction during init. as gen.dice needs it during compute",
+    )
     def aggregate(self, reduction: MetricReduction | str | None = None) -> torch.Tensor:
         """
         Execute reduction logic for the output of `compute_generalized_dice`.
-        Args:
-            reduction (Union[MetricReduction, str, None], optional): define mode of reduction to the metrics.
-                Available reduction modes: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``}.
-                Defaults to ``"mean"``. If "none", will not do reduction.
+        Returns:
+            torch.Tensor: Aggregated metric value.
+        Raises:
+            ValueError: If the data to aggregate is not a PyTorch Tensor.
         """
         data = self.get_buffer()
         if not isinstance(data, torch.Tensor):
             raise ValueError("The data to aggregate must be a PyTorch Tensor.")
-        # Validate reduction argument if specified
-        if reduction is not None:
-            reduction_options = ["none", "mean", "sum", "mean_batch", "sum_batch"]
-            if reduction not in reduction_options:
-                raise ValueError(f"reduction must be one of {reduction_options}")
         # Do metric reduction and return
-        f, _ = do_metric_reduction(data, reduction or self.reduction)
+        f, _ = do_metric_reduction(data, self.reduction)
         return f
 def compute_generalized_dice(
-    y_pred: torch.Tensor, y: torch.Tensor, include_background: bool = True, weight_type: Weight | str = Weight.SQUARE
+    y_pred: torch.Tensor,
+    y: torch.Tensor,
+    include_background: bool = True,
+    weight_type: Weight | str = Weight.SQUARE,
+    sum_over_classes: bool = False,
 ) -> torch.Tensor:
-    """Computes the Generalized Dice Score and returns a tensor with its per image values.
+    """
+    Computes the Generalized Dice Score and returns a tensor with its per image values.
     Args:
-        y_pred (torch.Tensor): binarized segmentation model output. It should be binarized, in one-hot format
+        y_pred (torch.Tensor): Binarized segmentation model output. It should be binarized, in one-hot format
             and in the NCHW[D] format, where N is the batch dimension, C is the channel dimension, and the
             remaining are the spatial dimensions.
-        y (torch.Tensor): binarized ground-truth. It should be binarized, in one-hot format and have the same shape as `y_pred`.
-        include_background (bool, optional): whether to include score computation on the first channel of the
+        y (torch.Tensor): Binarized ground-truth. It should be binarized, in one-hot format and have the same shape as `y_pred`.
+        include_background: Whether to include score computation on the first channel of the
             predicted output. Defaults to True.
         weight_type (Union[Weight, str], optional): {``"square"``, ``"simple"``, ``"uniform"``}. Type of function to
             transform ground truth volume into a weight factor. Defaults to ``"square"``.
+        sum_over_labels (bool): Whether to sum the numerator and denominator across all labels before the final computation.
     Returns:
-        torch.Tensor: per batch and per class Generalized Dice Score, i.e., with the shape [batch_size, num_classes].
+        torch.Tensor: Per batch and per class Generalized Dice Score, i.e., with the shape [batch_size, num_classes].
     Raises:
-        ValueError: if `y_pred` or `y` are not PyTorch tensors, if `y_pred` and `y` have less than three dimensions,
+        ValueError: If `y_pred` or `y` are not PyTorch tensors, if `y_pred` and `y` have less than three dimensions,
             or `y_pred` and `y` don't have the same shape.
     """
     # Ensure tensors have at least 3 dimensions and have the same shape
@@ -158,16 +182,21 @@ def compute_generalized_dice(
         b[infs] = 0
         b[infs] = torch.max(b)
-    # Compute the weighted numerator and denominator, summing along the class axis
-    numer = 2.0 * (intersection * w).sum(dim=1)
-    denom = (denominator * w).sum(dim=1)
+    # Compute the weighted numerator and denominator, summing along the class axis when sum_over_classes is True
+    if sum_over_classes:
+        numer = 2.0 * (intersection * w).sum(dim=1, keepdim=True)
+        denom = (denominator * w).sum(dim=1, keepdim=True)
+        y_pred_o = y_pred_o.sum(dim=-1, keepdim=True)
+    else:
+        numer = 2.0 * (intersection * w)
+        denom = denominator * w
+        y_pred_o = y_pred_o
     # Compute the score
     generalized_dice_score = numer / denom
     # Handle zero division. Where denom == 0 and the prediction volume is 0, score is 1.
     # Where denom == 0 but the prediction volume is not 0, score is 0
-    y_pred_o = y_pred_o.sum(dim=-1)
     denom_zeros = denom == 0
     generalized_dice_score[denom_zeros] = torch.where(
         (y_pred_o == 0)[denom_zeros],

monai/networks/__init__.py CHANGED Viewed

@@ -11,7 +11,9 @@
 from __future__ import annotations
+from .trt_compiler import trt_compile
 from .utils import (
+    add_casts_around_norms,
     convert_to_onnx,
     convert_to_torchscript,
     convert_to_trt,

monai/networks/layers/filtering.py CHANGED Viewed

@@ -51,6 +51,8 @@ class BilateralFilter(torch.autograd.Function):
         ctx.cs = color_sigma
         ctx.fa = fast_approx
         output_data = _C.bilateral_filter(input, spatial_sigma, color_sigma, fast_approx)
+        if torch.cuda.is_available():
+            torch.cuda.synchronize()
         return output_data
     @staticmethod
@@ -139,7 +141,8 @@ class TrainableBilateralFilterFunction(torch.autograd.Function):
             do_dsig_y,
             do_dsig_z,
         )
+        if torch.cuda.is_available():
+            torch.cuda.synchronize()
         return output_tensor
     @staticmethod
@@ -301,7 +304,8 @@ class TrainableJointBilateralFilterFunction(torch.autograd.Function):
             do_dsig_z,
             guidance_img,
         )
+        if torch.cuda.is_available():
+            torch.cuda.synchronize()
         return output_tensor
     @staticmethod

monai/networks/nets/swin_unetr.py CHANGED Viewed

@@ -320,7 +320,7 @@ class SwinUNETR(nn.Module):
             )
     def forward(self, x_in):
-        if not torch.jit.is_scripting():
+        if not torch.jit.is_scripting() and not torch.jit.is_tracing():
             self._check_input_size(x_in.shape[2:])
         hidden_states_out = self.swinViT(x_in, self.normalize)
         enc0 = self.encoder1(x_in)
@@ -1046,14 +1046,14 @@ class SwinTransformer(nn.Module):
     def proj_out(self, x, normalize=False):
         if normalize:
-            x_shape = x.size()
+            x_shape = x.shape
+            # Force trace() to generate a constant by casting to int
+            ch = int(x_shape[1])
             if len(x_shape) == 5:
-                n, ch, d, h, w = x_shape
                 x = rearrange(x, "n c d h w -> n d h w c")
                 x = F.layer_norm(x, [ch])
                 x = rearrange(x, "n d h w c -> n c d h w")
             elif len(x_shape) == 4:
-                n, ch, h, w = x_shape
                 x = rearrange(x, "n c h w -> n h w c")
                 x = F.layer_norm(x, [ch])
                 x = rearrange(x, "n h w c -> n c h w")

monai/networks/nets/vista3d.py CHANGED Viewed

@@ -23,7 +23,7 @@ import monai
 from monai.networks.blocks import MLPBlock, UnetrBasicBlock
 from monai.networks.nets import SegResNetDS2
 from monai.transforms.utils import convert_points_to_disc
-from monai.transforms.utils import get_largest_connected_component_mask_point as lcc
+from monai.transforms.utils import keep_merge_components_with_points as lcc
 from monai.transforms.utils import sample_points_from_label
 from monai.utils import optional_import, unsqueeze_left, unsqueeze_right
@@ -78,6 +78,35 @@ class VISTA3D(nn.Module):
         self.NINF_VALUE = -9999
         self.PINF_VALUE = 9999
+    def update_slidingwindow_padding(
+        self,
+        pad_size: list | None,
+        labels: torch.Tensor | None,
+        prev_mask: torch.Tensor | None,
+        point_coords: torch.Tensor | None,
+    ):
+        """
+        Image has been padded by sliding window inferer.
+        The related padding need to be performed outside of slidingwindow inferer.
+        Args:
+            pad_size: padding size passed from sliding window inferer.
+            labels: image label ground truth.
+            prev_mask: previous segmentation mask.
+            point_coords: point click coordinates.
+        """
+        if pad_size is None:
+            return labels, prev_mask, point_coords
+        if labels is not None:
+            labels = F.pad(labels, pad=pad_size, mode="constant", value=0)
+        if prev_mask is not None:
+            prev_mask = F.pad(prev_mask, pad=pad_size, mode="constant", value=0)
+        if point_coords is not None:
+            point_coords = point_coords + torch.tensor(
+                [pad_size[-2], pad_size[-4], pad_size[-6]], device=point_coords.device
+            )
+        return labels, prev_mask, point_coords
     def get_foreground_class_count(self, class_vector: torch.Tensor | None, point_coords: torch.Tensor | None) -> int:
         """Get number of foreground classes based on class and point prompt."""
         if class_vector is None:
@@ -307,16 +336,17 @@ class VISTA3D(nn.Module):
     def forward(
         self,
         input_images: torch.Tensor,
+        patch_coords: list[Sequence[slice]] | None = None,
         point_coords: torch.Tensor | None = None,
         point_labels: torch.Tensor | None = None,
         class_vector: torch.Tensor | None = None,
         prompt_class: torch.Tensor | None = None,
-        patch_coords: Sequence[slice] | None = None,
         labels: torch.Tensor | None = None,
         label_set: Sequence[int] | None = None,
         prev_mask: torch.Tensor | None = None,
         radius: int | None = None,
         val_point_sampler: Callable | None = None,
+        transpose: bool = False,
         **kwargs,
     ):
         """
@@ -329,13 +359,17 @@ class VISTA3D(nn.Module):
             point_coords: [B, N, 3]
             point_labels: [B, N], -1 represents padding. 0/1 means negative/positive points for regular class.
                 2/3 means negative/postive ponits for special supported class like tumor.
-            class_vector: [B, 1], the global class index
+            class_vector: [B, 1], the global class index.
             prompt_class: [B, 1], the global class index. This value is associated with point_coords to identify if
                 the points are for zero-shot or supported class. When class_vector and point_coords are both
                 provided, prompt_class is the same as class_vector. For prompt_class[b] > 512, point_coords[b]
                 will be considered novel class.
-            patch_coords: a sequence of the python slice objects representing the patch coordinates during sliding window inference.
-                This value is passed from sliding_window_inferer. This is an indicator for training phase or validation phase.
+            patch_coords: a list of sequence of the python slice objects representing the patch coordinates during sliding window
+                inference. This value is passed from sliding_window_inferer.
+                This is an indicator for training phase or validation phase.
+                Notice for sliding window batch size > 1 (only supported by automatic segmentation), patch_coords will inlcude
+                coordinates of multiple patches. If point prompts are included, the batch size can only be one and all the
+                functions using patch_coords will by default use patch_coords[0].
             labels: [1, 1, H, W, D], the groundtruth label tensor, only used for point-only evaluation
             label_set: the label index matching the indexes in labels. If labels are mapped to global index using RelabelID,
                 this label_set should be global mapped index. If labels are not mapped to global index, e.g. in zero-shot
@@ -346,8 +380,12 @@ class VISTA3D(nn.Module):
             radius: single float value controling the gaussian blur when combining point and auto results.
                 The gaussian combine is not used in VISTA3D training but might be useful for finetuning purposes.
             val_point_sampler: function used to sample points from labels. This is only used for point-only evaluation.
+            transpose: bool. If true, the output will be transposed to be [1, B, H, W, D]. Required to be true if calling from
+                sliding window inferer/point inferer.
         """
+        labels, prev_mask, point_coords = self.update_slidingwindow_padding(
+            kwargs.get("pad_size", None), labels, prev_mask, point_coords
+        )
         image_size = input_images.shape[-3:]
         device = input_images.device
         if point_coords is None and class_vector is None:
@@ -361,14 +399,14 @@ class VISTA3D(nn.Module):
                 if val_point_sampler is None:
                     # TODO: think about how to refactor this part.
                     val_point_sampler = self.sample_points_patch_val
-                point_coords, point_labels, prompt_class = val_point_sampler(labels, patch_coords, label_set)
+                point_coords, point_labels, prompt_class = val_point_sampler(labels, patch_coords[0], label_set)
                 if prompt_class[0].item() == 0:  # type: ignore
                     point_labels[0] = -1  # type: ignore
                 labels, prev_mask = None, None
             elif point_coords is not None:
                 # If not performing patch-based point only validation, use user provided click points for inference.
                 # the point clicks is in original image space, convert it to current patch-coordinate space.
-                point_coords, point_labels = self.update_point_to_patch(patch_coords, point_coords, point_labels)  # type: ignore
+                point_coords, point_labels = self.update_point_to_patch(patch_coords[0], point_coords, point_labels)  # type: ignore
         if point_coords is not None and point_labels is not None:
             # remove points that used for padding purposes (point_label = -1)
@@ -387,7 +425,10 @@ class VISTA3D(nn.Module):
                     point_coords, point_labels = None, None
         if point_coords is None and class_vector is None:
-            return self.NINF_VALUE + torch.zeros([bs, 1, *image_size], device=device)
+            logits = self.NINF_VALUE + torch.zeros([bs, 1, *image_size], device=device)
+            if transpose:
+                logits = logits.transpose(1, 0)
+            return logits
         if self.image_embeddings is not None and kwargs.get("keep_cache", False) and class_vector is None:
             out, out_auto = self.image_embeddings, None
@@ -418,15 +459,16 @@ class VISTA3D(nn.Module):
             logits[mapping_index] = self.point_head(out, point_coords, point_labels, class_vector=prompt_class)
             if prev_mask is not None and patch_coords is not None:
                 logits = self.connected_components_combine(
-                    prev_mask[patch_coords].transpose(1, 0).to(logits.device),
+                    prev_mask[patch_coords[0]].transpose(1, 0).to(logits.device),
                     logits[mapping_index],
                     point_coords,  # type: ignore
                     point_labels,  # type: ignore
                     mapping_index,
                 )
         if kwargs.get("keep_cache", False) and class_vector is None:
             self.image_embeddings = out.detach()
+        if transpose:
+            logits = logits.transpose(1, 0)
         return logits

monai-weekly 1.4.dev2434__py3-none-any.whl → 1.4.dev2436__py3-none-any.whl

monai-weekly 1.4.dev2434py3-none-any.whl → 1.4.dev2436py3-none-any.whl