PyPI - monai-weekly - Versions diffs - 1.5.dev2509__py3-none-any.whl → 1.5.dev2511__py3-none-any.whl - Mend

monai-weekly 1.5.dev2509py3-none-any.whl → 1.5.dev2511py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

monai/__init__.py +1 -1
monai/_version.py +3 -3
monai/apps/deepedit/interaction.py +1 -1
monai/apps/deepgrow/interaction.py +1 -1
monai/apps/detection/networks/retinanet_detector.py +1 -1
monai/apps/detection/networks/retinanet_network.py +5 -5
monai/apps/detection/utils/box_coder.py +2 -2
monai/apps/generation/maisi/networks/autoencoderkl_maisi.py +4 -0
monai/apps/mmars/mmars.py +1 -1
monai/apps/reconstruction/networks/blocks/varnetblock.py +1 -1
monai/bundle/scripts.py +3 -4
monai/data/dataset.py +2 -9
monai/data/utils.py +1 -1
monai/data/video_dataset.py +1 -1
monai/engines/evaluator.py +11 -16
monai/engines/trainer.py +11 -17
monai/engines/utils.py +1 -1
monai/engines/workflow.py +2 -2
monai/fl/client/monai_algo.py +1 -1
monai/handlers/checkpoint_loader.py +1 -1
monai/inferers/inferer.py +33 -13
monai/inferers/merger.py +16 -13
monai/losses/perceptual.py +1 -1
monai/losses/sure_loss.py +1 -1
monai/networks/blocks/crossattention.py +1 -6
monai/networks/blocks/feature_pyramid_network.py +4 -2
monai/networks/blocks/selfattention.py +1 -6
monai/networks/blocks/upsample.py +3 -11
monai/networks/layers/vector_quantizer.py +2 -2
monai/networks/nets/hovernet.py +5 -4
monai/networks/nets/resnet.py +2 -2
monai/networks/nets/senet.py +1 -1
monai/networks/nets/swin_unetr.py +46 -49
monai/networks/nets/transchex.py +3 -2
monai/networks/nets/vista3d.py +7 -7
monai/networks/schedulers/__init__.py +1 -0
monai/networks/schedulers/rectified_flow.py +322 -0
monai/networks/utils.py +5 -4
monai/transforms/intensity/array.py +1 -1
monai/transforms/spatial/array.py +6 -6
monai/utils/misc.py +1 -1
monai/utils/state_cacher.py +1 -1
{monai_weekly-1.5.dev2509.dist-info → monai_weekly-1.5.dev2511.dist-info}/METADATA +4 -3
{monai_weekly-1.5.dev2509.dist-info → monai_weekly-1.5.dev2511.dist-info}/RECORD +66 -64
{monai_weekly-1.5.dev2509.dist-info → monai_weekly-1.5.dev2511.dist-info}/WHEEL +1 -1
tests/bundle/test_bundle_download.py +16 -6
tests/config/test_cv2_dist.py +1 -2
tests/inferers/test_controlnet_inferers.py +96 -32
tests/inferers/test_diffusion_inferer.py +99 -1
tests/inferers/test_latent_diffusion_inferer.py +217 -211
tests/integration/test_integration_bundle_run.py +2 -4
tests/integration/test_integration_classification_2d.py +1 -1
tests/integration/test_integration_fast_train.py +2 -2
tests/integration/test_integration_segmentation_3d.py +1 -1
tests/metrics/test_compute_multiscalessim_metric.py +3 -3
tests/metrics/test_surface_dice.py +3 -3
tests/networks/nets/test_autoencoderkl.py +1 -1
tests/networks/nets/test_controlnet.py +1 -1
tests/networks/nets/test_diffusion_model_unet.py +1 -1
tests/networks/nets/test_network_consistency.py +1 -1
tests/networks/nets/test_swin_unetr.py +1 -1
tests/networks/nets/test_transformer.py +1 -1
tests/networks/schedulers/test_scheduler_rflow.py +105 -0
tests/networks/test_save_state.py +1 -1
{monai_weekly-1.5.dev2509.dist-info → monai_weekly-1.5.dev2511.dist-info}/LICENSE +0 -0
{monai_weekly-1.5.dev2509.dist-info → monai_weekly-1.5.dev2511.dist-info}/top_level.txt +0 -0

monai/networks/blocks/feature_pyramid_network.py CHANGED Viewed

@@ -54,7 +54,9 @@ from __future__ import annotations
 from collections import OrderedDict
 from collections.abc import Callable
+from typing import cast
+import torch
 import torch.nn.functional as F
 from torch import Tensor, nn
@@ -194,8 +196,8 @@ class FeaturePyramidNetwork(nn.Module):
         conv_type_: type[nn.Module] = Conv[Conv.CONV, spatial_dims]
         for m in self.modules():
             if isinstance(m, conv_type_):
-                nn.init.kaiming_uniform_(m.weight, a=1)
-                nn.init.constant_(m.bias, 0.0)
+                nn.init.kaiming_uniform_(cast(torch.Tensor, m.weight), a=1)
+                nn.init.constant_(cast(torch.Tensor, m.bias), 0.0)
         if extra_blocks is not None:
             if not isinstance(extra_blocks, ExtraFPNBlock):

monai/networks/blocks/selfattention.py CHANGED Viewed

@@ -18,7 +18,7 @@ import torch.nn as nn
 import torch.nn.functional as F
 from monai.networks.layers.utils import get_rel_pos_embedding_layer
-from monai.utils import optional_import, pytorch_after
+from monai.utils import optional_import
 Rearrange, _ = optional_import("einops.layers.torch", name="Rearrange")
@@ -90,11 +90,6 @@ class SABlock(nn.Module):
         if causal and sequence_length is None:
             raise ValueError("sequence_length is necessary for causal attention.")
-        if use_flash_attention and not pytorch_after(minor=13, major=1, patch=0):
-            raise ValueError(
-                "use_flash_attention is only supported for PyTorch versions >= 2.0."
-                "Upgrade your PyTorch or set the flag to False."
-            )
         if use_flash_attention and save_attn:
             raise ValueError(
                 "save_attn has been set to True, but use_flash_attention is also set"

monai/networks/blocks/upsample.py CHANGED Viewed

@@ -17,8 +17,8 @@ import torch
 import torch.nn as nn
 from monai.networks.layers.factories import Conv, Pad, Pool
-from monai.networks.utils import CastTempType, icnr_init, pixelshuffle
-from monai.utils import InterpolateMode, UpsampleMode, ensure_tuple_rep, look_up_option, pytorch_after
+from monai.networks.utils import icnr_init, pixelshuffle
+from monai.utils import InterpolateMode, UpsampleMode, ensure_tuple_rep, look_up_option
 __all__ = ["Upsample", "UpSample", "SubpixelUpsample", "Subpixelupsample", "SubpixelUpSample"]
@@ -164,15 +164,7 @@ class UpSample(nn.Sequential):
                 align_corners=align_corners,
             )
-            # Cast to float32 as 'upsample_nearest2d_out_frame' op does not support bfloat16
-            # https://github.com/pytorch/pytorch/issues/86679. This issue is solved in PyTorch 2.1
-            if pytorch_after(major=2, minor=1):
-                self.add_module("upsample_non_trainable", upsample)
-            else:
-                self.add_module(
-                    "upsample_non_trainable",
-                    CastTempType(initial_type=torch.bfloat16, temporary_type=torch.float32, submodule=upsample),
-                )
+            self.add_module("upsample_non_trainable", upsample)
             if post_conv:
                 self.add_module("postconv", post_conv)
         elif up_mode == UpsampleMode.PIXELSHUFFLE:

monai/networks/layers/vector_quantizer.py CHANGED Viewed

@@ -100,7 +100,7 @@ class EMAQuantizer(nn.Module):
             torch.Tensor: Quantization indices of shape [B,H,W,D,1]
         """
-        with torch.cuda.amp.autocast(enabled=False):
+        with torch.autocast("cuda", enabled=False):
             encoding_indices_view = list(inputs.shape)
             del encoding_indices_view[1]
@@ -138,7 +138,7 @@ class EMAQuantizer(nn.Module):
         Returns:
             torch.Tensor: Quantize space representation of encoding_indices in channel first format.
         """
-        with torch.cuda.amp.autocast(enabled=False):
+        with torch.autocast("cuda", enabled=False):
             embedding: torch.Tensor = (
                 self.embedding(embedding_indices).permute(self.quantization_permutation).contiguous()
             )

monai/networks/nets/hovernet.py CHANGED Viewed

@@ -633,9 +633,9 @@ def _remap_preact_resnet_model(model_url: str):
     # download the pretrained weights into torch hub's default dir
     weights_dir = os.path.join(torch.hub.get_dir(), "preact-resnet50.pth")
     download_url(model_url, fuzzy=True, filepath=weights_dir, progress=False)
-    state_dict = torch.load(weights_dir, map_location=None if torch.cuda.is_available() else torch.device("cpu"))[
-        "desc"
-    ]
+    map_location = None if torch.cuda.is_available() else torch.device("cpu")
+    state_dict = torch.load(weights_dir, map_location=map_location, weights_only=True)["desc"]
     for key in list(state_dict.keys()):
         new_key = None
         if pattern_conv0.match(key):
@@ -668,7 +668,8 @@ def _remap_standard_resnet_model(model_url: str, state_dict_key: str | None = No
     # download the pretrained weights into torch hub's default dir
     weights_dir = os.path.join(torch.hub.get_dir(), "resnet50.pth")
     download_url(model_url, fuzzy=True, filepath=weights_dir, progress=False)
-    state_dict = torch.load(weights_dir, map_location=None if torch.cuda.is_available() else torch.device("cpu"))
+    map_location = None if torch.cuda.is_available() else torch.device("cpu")
+    state_dict = torch.load(weights_dir, map_location=map_location, weights_only=True)
     if state_dict_key is not None:
         state_dict = state_dict[state_dict_key]

monai/networks/nets/resnet.py CHANGED Viewed

@@ -493,7 +493,7 @@ def _resnet(
         if isinstance(pretrained, str):
             if Path(pretrained).exists():
                 logger.info(f"Loading weights from {pretrained}...")
-                model_state_dict = torch.load(pretrained, map_location=device)
+                model_state_dict = torch.load(pretrained, map_location=device, weights_only=True)
             else:
                 # Throw error
                 raise FileNotFoundError("The pretrained checkpoint file is not found")
@@ -665,7 +665,7 @@ def get_pretrained_resnet_medicalnet(resnet_depth: int, device: str = "cpu", dat
                 raise EntryNotFoundError(
                     f"{filename} not found on {medicalnet_huggingface_repo_basename}{resnet_depth}"
                 ) from None
-        checkpoint = torch.load(pretrained_path, map_location=torch.device(device))
+        checkpoint = torch.load(pretrained_path, map_location=torch.device(device), weights_only=True)
     else:
         raise NotImplementedError("Supported resnet_depth are: [10, 18, 34, 50, 101, 152, 200]")
     logger.info(f"{filename} downloaded")

monai/networks/nets/senet.py CHANGED Viewed

@@ -302,7 +302,7 @@ def _load_state_dict(model: nn.Module, arch: str, progress: bool):
     if isinstance(model_url, dict):
         download_url(model_url["url"], filepath=model_url["filename"])
-        state_dict = torch.load(model_url["filename"], map_location=None)
+        state_dict = torch.load(model_url["filename"], map_location=None, weights_only=True)
     else:
         state_dict = load_state_dict_from_url(model_url, progress=progress)
     for key in list(state_dict.keys()):

monai/networks/nets/swin_unetr.py CHANGED Viewed

@@ -272,53 +272,50 @@ class SwinUNETR(nn.Module):
         self.out = UnetOutBlock(spatial_dims=spatial_dims, in_channels=feature_size, out_channels=out_channels)
     def load_from(self, weights):
+        layers1_0: BasicLayer = self.swinViT.layers1[0]  # type: ignore[assignment]
+        layers2_0: BasicLayer = self.swinViT.layers2[0]  # type: ignore[assignment]
+        layers3_0: BasicLayer = self.swinViT.layers3[0]  # type: ignore[assignment]
+        layers4_0: BasicLayer = self.swinViT.layers4[0]  # type: ignore[assignment]
+        wstate = weights["state_dict"]
         with torch.no_grad():
-            self.swinViT.patch_embed.proj.weight.copy_(weights["state_dict"]["module.patch_embed.proj.weight"])
-            self.swinViT.patch_embed.proj.bias.copy_(weights["state_dict"]["module.patch_embed.proj.bias"])
-            for bname, block in self.swinViT.layers1[0].blocks.named_children():
-                block.load_from(weights, n_block=bname, layer="layers1")
-            self.swinViT.layers1[0].downsample.reduction.weight.copy_(
-                weights["state_dict"]["module.layers1.0.downsample.reduction.weight"]
-            )
-            self.swinViT.layers1[0].downsample.norm.weight.copy_(
-                weights["state_dict"]["module.layers1.0.downsample.norm.weight"]
-            )
-            self.swinViT.layers1[0].downsample.norm.bias.copy_(
-                weights["state_dict"]["module.layers1.0.downsample.norm.bias"]
-            )
-            for bname, block in self.swinViT.layers2[0].blocks.named_children():
-                block.load_from(weights, n_block=bname, layer="layers2")
-            self.swinViT.layers2[0].downsample.reduction.weight.copy_(
-                weights["state_dict"]["module.layers2.0.downsample.reduction.weight"]
-            )
-            self.swinViT.layers2[0].downsample.norm.weight.copy_(
-                weights["state_dict"]["module.layers2.0.downsample.norm.weight"]
-            )
-            self.swinViT.layers2[0].downsample.norm.bias.copy_(
-                weights["state_dict"]["module.layers2.0.downsample.norm.bias"]
-            )
-            for bname, block in self.swinViT.layers3[0].blocks.named_children():
-                block.load_from(weights, n_block=bname, layer="layers3")
-            self.swinViT.layers3[0].downsample.reduction.weight.copy_(
-                weights["state_dict"]["module.layers3.0.downsample.reduction.weight"]
-            )
-            self.swinViT.layers3[0].downsample.norm.weight.copy_(
-                weights["state_dict"]["module.layers3.0.downsample.norm.weight"]
-            )
-            self.swinViT.layers3[0].downsample.norm.bias.copy_(
-                weights["state_dict"]["module.layers3.0.downsample.norm.bias"]
-            )
-            for bname, block in self.swinViT.layers4[0].blocks.named_children():
-                block.load_from(weights, n_block=bname, layer="layers4")
-            self.swinViT.layers4[0].downsample.reduction.weight.copy_(
-                weights["state_dict"]["module.layers4.0.downsample.reduction.weight"]
-            )
-            self.swinViT.layers4[0].downsample.norm.weight.copy_(
-                weights["state_dict"]["module.layers4.0.downsample.norm.weight"]
-            )
-            self.swinViT.layers4[0].downsample.norm.bias.copy_(
-                weights["state_dict"]["module.layers4.0.downsample.norm.bias"]
-            )
+            self.swinViT.patch_embed.proj.weight.copy_(wstate["module.patch_embed.proj.weight"])
+            self.swinViT.patch_embed.proj.bias.copy_(wstate["module.patch_embed.proj.bias"])
+            for bname, block in layers1_0.blocks.named_children():
+                block.load_from(weights, n_block=bname, layer="layers1")  # type: ignore[operator]
+            if layers1_0.downsample is not None:
+                d = layers1_0.downsample
+                d.reduction.weight.copy_(wstate["module.layers1.0.downsample.reduction.weight"])  # type: ignore
+                d.norm.weight.copy_(wstate["module.layers1.0.downsample.norm.weight"])  # type: ignore
+                d.norm.bias.copy_(wstate["module.layers1.0.downsample.norm.bias"])  # type: ignore
+            for bname, block in layers2_0.blocks.named_children():
+                block.load_from(weights, n_block=bname, layer="layers2")  # type: ignore[operator]
+            if layers2_0.downsample is not None:
+                d = layers2_0.downsample
+                d.reduction.weight.copy_(wstate["module.layers2.0.downsample.reduction.weight"])  # type: ignore
+                d.norm.weight.copy_(wstate["module.layers2.0.downsample.norm.weight"])  # type: ignore
+                d.norm.bias.copy_(wstate["module.layers2.0.downsample.norm.bias"])  # type: ignore
+            for bname, block in layers3_0.blocks.named_children():
+                block.load_from(weights, n_block=bname, layer="layers3")  # type: ignore[operator]
+            if layers3_0.downsample is not None:
+                d = layers3_0.downsample
+                d.reduction.weight.copy_(wstate["module.layers3.0.downsample.reduction.weight"])  # type: ignore
+                d.norm.weight.copy_(wstate["module.layers3.0.downsample.norm.weight"])  # type: ignore
+                d.norm.bias.copy_(wstate["module.layers3.0.downsample.norm.bias"])  # type: ignore
+            for bname, block in layers4_0.blocks.named_children():
+                block.load_from(weights, n_block=bname, layer="layers4")  # type: ignore[operator]
+            if layers4_0.downsample is not None:
+                d = layers4_0.downsample
+                d.reduction.weight.copy_(wstate["module.layers4.0.downsample.reduction.weight"])  # type: ignore
+                d.norm.weight.copy_(wstate["module.layers4.0.downsample.norm.weight"])  # type: ignore
+                d.norm.bias.copy_(wstate["module.layers4.0.downsample.norm.bias"])  # type: ignore
     @torch.jit.unused
     def _check_input_size(self, spatial_shape):
@@ -532,7 +529,7 @@ class WindowAttention(nn.Module):
         q = q * self.scale
         attn = q @ k.transpose(-2, -1)
         relative_position_bias = self.relative_position_bias_table[
-            self.relative_position_index.clone()[:n, :n].reshape(-1)
+            self.relative_position_index.clone()[:n, :n].reshape(-1)  # type: ignore[operator]
         ].reshape(n, n, -1)
         relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()
         attn = attn + relative_position_bias.unsqueeze(0)
@@ -691,7 +688,7 @@ class SwinTransformerBlock(nn.Module):
             self.norm1.weight.copy_(weights["state_dict"][root + block_names[0]])
             self.norm1.bias.copy_(weights["state_dict"][root + block_names[1]])
             self.attn.relative_position_bias_table.copy_(weights["state_dict"][root + block_names[2]])
-            self.attn.relative_position_index.copy_(weights["state_dict"][root + block_names[3]])
+            self.attn.relative_position_index.copy_(weights["state_dict"][root + block_names[3]])  # type: ignore[operator]
             self.attn.qkv.weight.copy_(weights["state_dict"][root + block_names[4]])
             self.attn.qkv.bias.copy_(weights["state_dict"][root + block_names[5]])
             self.attn.proj.weight.copy_(weights["state_dict"][root + block_names[6]])
@@ -1118,7 +1115,7 @@ def filter_swinunetr(key, value):
         )
         ssl_weights_path = "./ssl_pretrained_weights.pth"
         download_url(resource, ssl_weights_path)
-        ssl_weights = torch.load(ssl_weights_path)["model"]
+        ssl_weights = torch.load(ssl_weights_path, weights_only=True)["model"]
         dst_dict, loaded, not_loaded = copy_model_state(model, ssl_weights, filter_func=filter_swinunetr)

monai/networks/nets/transchex.py CHANGED Viewed

@@ -43,7 +43,7 @@ class BertPreTrainedModel(nn.Module):
     def init_bert_weights(self, module):
         if isinstance(module, (nn.Linear, nn.Embedding)):
-            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)  # type: ignore[union-attr,arg-type]
         elif isinstance(module, torch.nn.LayerNorm):
             module.bias.data.zero_()
             module.weight.data.fill_(1.0)
@@ -68,7 +68,8 @@ class BertPreTrainedModel(nn.Module):
         weights_path = cached_file(path_or_repo_id, filename, cache_dir=cache_dir)
         model = cls(num_language_layers, num_vision_layers, num_mixed_layers, bert_config, *inputs, **kwargs)
         if state_dict is None and not from_tf:
-            state_dict = torch.load(weights_path, map_location="cpu" if not torch.cuda.is_available() else None)
+            map_location = "cpu" if not torch.cuda.is_available() else None
+            state_dict = torch.load(weights_path, map_location=map_location, weights_only=True)
         if from_tf:
             return load_tf_weights_in_bert(model, weights_path)
         old_keys = []

monai/networks/nets/vista3d.py CHANGED Viewed

@@ -315,7 +315,7 @@ class VISTA3D(nn.Module):
         """
         if auto_freeze != self.auto_freeze:
             if hasattr(self.image_encoder, "set_auto_grad"):
-                self.image_encoder.set_auto_grad(auto_freeze=auto_freeze, point_freeze=point_freeze)
+                self.image_encoder.set_auto_grad(auto_freeze=auto_freeze, point_freeze=point_freeze)  # type: ignore[operator]
             else:
                 for param in self.image_encoder.parameters():
                     param.requires_grad = (not auto_freeze) and (not point_freeze)
@@ -325,7 +325,7 @@ class VISTA3D(nn.Module):
         if point_freeze != self.point_freeze:
             if hasattr(self.image_encoder, "set_auto_grad"):
-                self.image_encoder.set_auto_grad(auto_freeze=auto_freeze, point_freeze=point_freeze)
+                self.image_encoder.set_auto_grad(auto_freeze=auto_freeze, point_freeze=point_freeze)  # type: ignore[operator]
             else:
                 for param in self.image_encoder.parameters():
                     param.requires_grad = (not auto_freeze) and (not point_freeze)
@@ -543,10 +543,10 @@ class PointMappingSAM(nn.Module):
         point_embedding = self.pe_layer.forward_with_coords(points, out_shape)  # type: ignore
         point_embedding[point_labels == -1] = 0.0
         point_embedding[point_labels == -1] += self.not_a_point_embed.weight
-        point_embedding[point_labels == 0] += self.point_embeddings[0].weight
-        point_embedding[point_labels == 1] += self.point_embeddings[1].weight
-        point_embedding[point_labels == 2] += self.point_embeddings[0].weight + self.special_class_embed.weight
-        point_embedding[point_labels == 3] += self.point_embeddings[1].weight + self.special_class_embed.weight
+        point_embedding[point_labels == 0] += self.point_embeddings[0].weight  # type: ignore[arg-type]
+        point_embedding[point_labels == 1] += self.point_embeddings[1].weight  # type: ignore[arg-type]
+        point_embedding[point_labels == 2] += self.point_embeddings[0].weight + self.special_class_embed.weight  # type: ignore[operator]
+        point_embedding[point_labels == 3] += self.point_embeddings[1].weight + self.special_class_embed.weight  # type: ignore[operator]
         output_tokens = self.mask_tokens.weight
         output_tokens = output_tokens.unsqueeze(0).expand(point_embedding.size(0), -1, -1)
@@ -884,7 +884,7 @@ class PositionEmbeddingRandom(nn.Module):
         coords = 2 * coords - 1
         # [bs=1,N=2,2] @ [2,128]
         # [bs=1, N=2, 128]
-        coords = coords @ self.positional_encoding_gaussian_matrix
+        coords = coords @ self.positional_encoding_gaussian_matrix  # type: ignore[operator]
         coords = 2 * np.pi * coords
         # outputs d_1 x ... x d_n x C shape
         # [bs=1, N=2, 128+128=256]

monai/networks/schedulers/__init__.py CHANGED Viewed

@@ -14,4 +14,5 @@ from __future__ import annotations
 from .ddim import DDIMScheduler
 from .ddpm import DDPMScheduler
 from .pndm import PNDMScheduler
+from .rectified_flow import RFlowScheduler
 from .scheduler import NoiseSchedules, Scheduler

monai/networks/schedulers/rectified_flow.py ADDED Viewed

@@ -0,0 +1,322 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# =========================================================================
+# Adapted from https://github.com/hpcaitech/Open-Sora/blob/main/opensora/schedulers/rf/rectified_flow.py
+# which has the following license:
+# https://github.com/hpcaitech/Open-Sora/blob/main/LICENSE
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =========================================================================
+from __future__ import annotations
+from typing import Union
+import numpy as np
+import torch
+from torch.distributions import LogisticNormal
+from monai.utils import StrEnum
+from .ddpm import DDPMPredictionType
+from .scheduler import Scheduler
+class RFlowPredictionType(StrEnum):
+    """
+    Set of valid prediction type names for the RFlow scheduler's `prediction_type` argument.
+    v_prediction: velocity prediction, see section 2.4 https://imagen.research.google/video/paper.pdf
+    """
+    V_PREDICTION = DDPMPredictionType.V_PREDICTION
+def timestep_transform(
+    t, input_img_size_numel, base_img_size_numel=32 * 32 * 32, scale=1.0, num_train_timesteps=1000, spatial_dim=3
+):
+    """
+    Applies a transformation to the timestep based on image resolution scaling.
+    Args:
+        t (torch.Tensor): The original timestep(s).
+        input_img_size_numel (torch.Tensor): The input image's size (H * W * D).
+        base_img_size_numel (int): reference H*W*D size, usually smaller than input_img_size_numel.
+        scale (float): Scaling factor for the transformation.
+        num_train_timesteps (int): Total number of training timesteps.
+        spatial_dim (int): Number of spatial dimensions in the image.
+    Returns:
+        torch.Tensor: Transformed timestep(s).
+    """
+    t = t / num_train_timesteps
+    ratio_space = (input_img_size_numel / base_img_size_numel) ** (1.0 / spatial_dim)
+    ratio = ratio_space * scale
+    new_t = ratio * t / (1 + (ratio - 1) * t)
+    new_t = new_t * num_train_timesteps
+    return new_t
+class RFlowScheduler(Scheduler):
+    """
+    A rectified flow scheduler for guiding the diffusion process in a generative model.
+    Supports uniform and logit-normal sampling methods, timestep transformation for
+    different resolutions, and noise addition during diffusion.
+    Args:
+        num_train_timesteps (int): Total number of training timesteps.
+        use_discrete_timesteps (bool): Whether to use discrete timesteps.
+        sample_method (str): Training time step sampling method ('uniform' or 'logit-normal').
+        loc (float): Location parameter for logit-normal distribution, used only if sample_method='logit-normal'.
+        scale (float): Scale parameter for logit-normal distribution, used only if sample_method='logit-normal'.
+        use_timestep_transform (bool): Whether to apply timestep transformation.
+            If true, there will be more inference timesteps at early(noisy) stages for larger image volumes.
+        transform_scale (float): Scaling factor for timestep transformation, used only if use_timestep_transform=True.
+        steps_offset (int): Offset added to computed timesteps, used only if use_timestep_transform=True.
+        base_img_size_numel (int): Reference image volume size for scaling, used only if use_timestep_transform=True.
+        spatial_dim (int): 2 or 3, incidcating 2D or 3D images, used only if use_timestep_transform=True.
+    Example:
+        .. code-block:: python
+            # define a scheduler
+            noise_scheduler = RFlowScheduler(
+                num_train_timesteps = 1000,
+                use_discrete_timesteps = True,
+                sample_method = 'logit-normal',
+                use_timestep_transform = True,
+                base_img_size_numel = 32 * 32 * 32,
+                spatial_dim = 3
+            )
+            # during training
+            inputs = torch.ones(2,4,64,64,32)
+            noise = torch.randn_like(inputs)
+            timesteps = noise_scheduler.sample_timesteps(inputs)
+            noisy_inputs = noise_scheduler.add_noise(original_samples=inputs, noise=noise, timesteps=timesteps)
+            predicted_velocity = diffusion_unet(
+                x=noisy_inputs,
+                timesteps=timesteps
+            )
+            loss = loss_l1(predicted_velocity, (inputs - noise))
+            # during inference
+            noisy_inputs = torch.randn(2,4,64,64,32)
+            input_img_size_numel = torch.prod(torch.tensor(noisy_inputs.shape[-3:])
+            noise_scheduler.set_timesteps(
+                num_inference_steps=30, input_img_size_numel=input_img_size_numel)
+            )
+            all_next_timesteps = torch.cat(
+                (noise_scheduler.timesteps[1:], torch.tensor([0], dtype=noise_scheduler.timesteps.dtype))
+            )
+            for t, next_t in tqdm(
+                zip(noise_scheduler.timesteps, all_next_timesteps),
+                total=min(len(noise_scheduler.timesteps), len(all_next_timesteps)),
+            ):
+                predicted_velocity = diffusion_unet(
+                    x=noisy_inputs,
+                    timesteps=timesteps
+                )
+                noisy_inputs, _ = noise_scheduler.step(predicted_velocity, t, noisy_inputs, next_t)
+            final_output = noisy_inputs
+    """
+    def __init__(
+        self,
+        num_train_timesteps: int = 1000,
+        use_discrete_timesteps: bool = True,
+        sample_method: str = "uniform",
+        loc: float = 0.0,
+        scale: float = 1.0,
+        use_timestep_transform: bool = False,
+        transform_scale: float = 1.0,
+        steps_offset: int = 0,
+        base_img_size_numel: int = 32 * 32 * 32,
+        spatial_dim: int = 3,
+    ):
+        # rectified flow only accepts velocity prediction
+        self.prediction_type = RFlowPredictionType.V_PREDICTION
+        self.num_train_timesteps = num_train_timesteps
+        self.use_discrete_timesteps = use_discrete_timesteps
+        self.base_img_size_numel = base_img_size_numel
+        self.spatial_dim = spatial_dim
+        # sample method
+        if sample_method not in ["uniform", "logit-normal"]:
+            raise ValueError(
+                f"sample_method = {sample_method}, which has to be chosen from ['uniform', 'logit-normal']."
+            )
+        self.sample_method = sample_method
+        if sample_method == "logit-normal":
+            self.distribution = LogisticNormal(torch.tensor([loc]), torch.tensor([scale]))
+            self.sample_t = lambda x: self.distribution.sample((x.shape[0],))[:, 0].to(x.device)
+        # timestep transform
+        self.use_timestep_transform = use_timestep_transform
+        self.transform_scale = transform_scale
+        self.steps_offset = steps_offset
+    def add_noise(self, original_samples: torch.Tensor, noise: torch.Tensor, timesteps: torch.Tensor) -> torch.Tensor:
+        """
+        Add noise to the original samples.
+        Args:
+            original_samples: original samples
+            noise: noise to add to samples
+            timesteps: timesteps tensor with shape of (N,), indicating the timestep to be computed for each sample.
+        Returns:
+            noisy_samples: sample with added noise
+        """
+        timepoints: torch.Tensor = timesteps.float() / self.num_train_timesteps
+        timepoints = 1 - timepoints  # [1,1/1000]
+        # expand timepoint to noise shape
+        if noise.ndim == 5:
+            timepoints = timepoints[..., None, None, None, None].expand(-1, *noise.shape[1:])
+        elif noise.ndim == 4:
+            timepoints = timepoints[..., None, None, None].expand(-1, *noise.shape[1:])
+        else:
+            raise ValueError(f"noise tensor has to be 4D or 5D tensor, yet got shape of {noise.shape}")
+        noisy_samples: torch.Tensor = timepoints * original_samples + (1 - timepoints) * noise
+        return noisy_samples
+    def set_timesteps(
+        self,
+        num_inference_steps: int,
+        device: str | torch.device | None = None,
+        input_img_size_numel: int | None = None,
+    ) -> None:
+        """
+        Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference.
+        Args:
+            num_inference_steps: number of diffusion steps used when generating samples with a pre-trained model.
+            device: target device to put the data.
+            input_img_size_numel: int, H*W*D of the image, used with self.use_timestep_transform is True.
+        """
+        if num_inference_steps > self.num_train_timesteps or num_inference_steps < 1:
+            raise ValueError(
+                f"`num_inference_steps`: {num_inference_steps} should be at least 1, "
+                "and cannot be larger than `self.num_train_timesteps`:"
+                f" {self.num_train_timesteps} as the unet model trained with this scheduler can only handle"
+                f" maximal {self.num_train_timesteps} timesteps."
+            )
+        self.num_inference_steps = num_inference_steps
+        # prepare timesteps
+        timesteps = [
+            (1.0 - i / self.num_inference_steps) * self.num_train_timesteps for i in range(self.num_inference_steps)
+        ]
+        if self.use_discrete_timesteps:
+            timesteps = [int(round(t)) for t in timesteps]
+        if self.use_timestep_transform:
+            timesteps = [
+                timestep_transform(
+                    t,
+                    input_img_size_numel=input_img_size_numel,
+                    base_img_size_numel=self.base_img_size_numel,
+                    num_train_timesteps=self.num_train_timesteps,
+                    spatial_dim=self.spatial_dim,
+                )
+                for t in timesteps
+            ]
+        timesteps_np = np.array(timesteps).astype(np.float16)
+        if self.use_discrete_timesteps:
+            timesteps_np = timesteps_np.astype(np.int64)
+        self.timesteps = torch.from_numpy(timesteps_np).to(device)
+        self.timesteps += self.steps_offset
+    def sample_timesteps(self, x_start):
+        """
+        Randomly samples training timesteps using the chosen sampling method.
+        Args:
+            x_start (torch.Tensor): The input tensor for sampling.
+        Returns:
+            torch.Tensor: Sampled timesteps.
+        """
+        if self.sample_method == "uniform":
+            t = torch.rand((x_start.shape[0],), device=x_start.device) * self.num_train_timesteps
+        elif self.sample_method == "logit-normal":
+            t = self.sample_t(x_start) * self.num_train_timesteps
+        if self.use_discrete_timesteps:
+            t = t.long()
+        if self.use_timestep_transform:
+            input_img_size_numel = torch.prod(torch.tensor(x_start.shape[2:]))
+            t = timestep_transform(
+                t,
+                input_img_size_numel=input_img_size_numel,
+                base_img_size_numel=self.base_img_size_numel,
+                num_train_timesteps=self.num_train_timesteps,
+                spatial_dim=len(x_start.shape) - 2,
+            )
+        return t
+    def step(
+        self, model_output: torch.Tensor, timestep: int, sample: torch.Tensor, next_timestep: Union[int, None] = None
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """
+        Predicts the next sample in the diffusion process.
+        Args:
+            model_output (torch.Tensor): Output from the trained diffusion model.
+            timestep (int): Current timestep in the diffusion chain.
+            sample (torch.Tensor): Current sample in the process.
+            next_timestep (Union[int, None]): Optional next timestep.
+        Returns:
+            tuple[torch.Tensor, torch.Tensor]: Predicted sample at the next step and additional info.
+        """
+        # Ensure num_inference_steps exists and is a valid integer
+        if not hasattr(self, "num_inference_steps") or not isinstance(self.num_inference_steps, int):
+            raise AttributeError(
+                "num_inference_steps is missing or not an integer in the class."
+                "Please run self.set_timesteps(num_inference_steps,device,input_img_size_numel) to set it."
+            )
+        v_pred = model_output
+        if next_timestep is not None:
+            next_timestep = int(next_timestep)
+            dt: float = (
+                float(timestep - next_timestep) / self.num_train_timesteps
+            )  # Now next_timestep is guaranteed to be int
+        else:
+            dt = (
+                1.0 / float(self.num_inference_steps) if self.num_inference_steps > 0 else 0.0
+            )  # Avoid division by zero
+        pred_post_sample = sample + v_pred * dt
+        pred_original_sample = sample + v_pred * timestep / self.num_train_timesteps
+        return pred_post_sample, pred_original_sample

monai-weekly 1.5.dev2509__py3-none-any.whl → 1.5.dev2511__py3-none-any.whl

monai-weekly 1.5.dev2509py3-none-any.whl → 1.5.dev2511py3-none-any.whl