PyPI - diffusers - Versions diffs - 0.27.2__py3-none-any.whl → 0.28.0__py3-none-any.whl - Mend

diffusers 0.27.2py3-none-any.whl → 0.28.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (270) hide show

diffusers/schedulers/scheduling_sde_ve.py CHANGED Viewed

@@ -32,15 +32,15 @@ class SdeVeOutput(BaseOutput):
     Output class for the scheduler's `step` function output.
     Args:
-        prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+        prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
             Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
             denoising loop.
-        prev_sample_mean (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+        prev_sample_mean (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
             Mean averaged `prev_sample` over previous timesteps.
     """
-    prev_sample: torch.FloatTensor
-    prev_sample_mean: torch.FloatTensor
+    prev_sample: torch.Tensor
+    prev_sample_mean: torch.Tensor
 class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
@@ -86,19 +86,19 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
         self.set_sigmas(num_train_timesteps, sigma_min, sigma_max, sampling_eps)
-    def scale_model_input(self, sample: torch.FloatTensor, timestep: Optional[int] = None) -> torch.FloatTensor:
+    def scale_model_input(self, sample: torch.Tensor, timestep: Optional[int] = None) -> torch.Tensor:
         """
         Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
         current timestep.
         Args:
-            sample (`torch.FloatTensor`):
+            sample (`torch.Tensor`):
                 The input sample.
             timestep (`int`, *optional*):
                 The current timestep in the diffusion chain.
         Returns:
-            `torch.FloatTensor`:
+            `torch.Tensor`:
                 A scaled input sample.
         """
         return sample
@@ -159,9 +159,9 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
     def step_pred(
         self,
-        model_output: torch.FloatTensor,
+        model_output: torch.Tensor,
         timestep: int,
-        sample: torch.FloatTensor,
+        sample: torch.Tensor,
         generator: Optional[torch.Generator] = None,
         return_dict: bool = True,
     ) -> Union[SdeVeOutput, Tuple]:
@@ -170,11 +170,11 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
         process from the learned model outputs (most often the predicted noise).
         Args:
-            model_output (`torch.FloatTensor`):
+            model_output (`torch.Tensor`):
                 The direct output from learned diffusion model.
             timestep (`int`):
                 The current discrete timestep in the diffusion chain.
-            sample (`torch.FloatTensor`):
+            sample (`torch.Tensor`):
                 A current instance of a sample created by the diffusion process.
             generator (`torch.Generator`, *optional*):
                 A random number generator.
@@ -227,8 +227,8 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
     def step_correct(
         self,
-        model_output: torch.FloatTensor,
-        sample: torch.FloatTensor,
+        model_output: torch.Tensor,
+        sample: torch.Tensor,
         generator: Optional[torch.Generator] = None,
         return_dict: bool = True,
     ) -> Union[SchedulerOutput, Tuple]:
@@ -237,9 +237,9 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
         making the prediction for the previous timestep.
         Args:
-            model_output (`torch.FloatTensor`):
+            model_output (`torch.Tensor`):
                 The direct output from learned diffusion model.
-            sample (`torch.FloatTensor`):
+            sample (`torch.Tensor`):
                 A current instance of a sample created by the diffusion process.
             generator (`torch.Generator`, *optional*):
                 A random number generator.
@@ -282,10 +282,10 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
     def add_noise(
         self,
-        original_samples: torch.FloatTensor,
-        noise: torch.FloatTensor,
-        timesteps: torch.FloatTensor,
-    ) -> torch.FloatTensor:
+        original_samples: torch.Tensor,
+        noise: torch.Tensor,
+        timesteps: torch.Tensor,
+    ) -> torch.Tensor:
         # Make sure sigmas and timesteps have the same device and dtype as original_samples
         timesteps = timesteps.to(original_samples.device)
         sigmas = self.discrete_sigmas.to(original_samples.device)[timesteps]

diffusers/schedulers/scheduling_tcd.py CHANGED Viewed

@@ -37,15 +37,15 @@ class TCDSchedulerOutput(BaseOutput):
     Output class for the scheduler's `step` function output.
     Args:
-        prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+        prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
             Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
             denoising loop.
-        pred_noised_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+        pred_noised_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
             The predicted noised sample `(x_{s})` based on the model output from the current timestep.
     """
-    prev_sample: torch.FloatTensor
-    pred_noised_sample: Optional[torch.FloatTensor] = None
+    prev_sample: torch.Tensor
+    pred_noised_sample: Optional[torch.Tensor] = None
 # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
@@ -83,7 +83,7 @@ def betas_for_alpha_bar(
             return math.exp(t * -12.0)
     else:
-        raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
+        raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}")
     betas = []
     for i in range(num_diffusion_timesteps):
@@ -94,17 +94,17 @@ def betas_for_alpha_bar(
 # Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
-def rescale_zero_terminal_snr(betas: torch.FloatTensor) -> torch.FloatTensor:
+def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor:
     """
     Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
     Args:
-        betas (`torch.FloatTensor`):
+        betas (`torch.Tensor`):
             the betas that the scheduler is being initialized with.
     Returns:
-        `torch.FloatTensor`: rescaled betas with zero terminal SNR
+        `torch.Tensor`: rescaled betas with zero terminal SNR
     """
     # Convert betas to alphas_bar_sqrt
     alphas = 1.0 - betas
@@ -132,8 +132,8 @@ def rescale_zero_terminal_snr(betas: torch.FloatTensor) -> torch.FloatTensor:
 class TCDScheduler(SchedulerMixin, ConfigMixin):
     """
-    `TCDScheduler` incorporates the `Strategic Stochastic Sampling` introduced by the paper `Trajectory Consistency Distillation`,
-    extending the original Multistep Consistency Sampling to enable unrestricted trajectory traversal.
+    `TCDScheduler` incorporates the `Strategic Stochastic Sampling` introduced by the paper `Trajectory Consistency
+    Distillation`, extending the original Multistep Consistency Sampling to enable unrestricted trajectory traversal.
     This code is based on the official repo of TCD(https://github.com/jabir-zheng/TCD).
@@ -225,7 +225,7 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
             # Glide cosine schedule
             self.betas = betas_for_alpha_bar(num_train_timesteps)
         else:
-            raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
+            raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}")
         # Rescale for zero SNR
         if rescale_betas_zero_snr:
@@ -297,18 +297,19 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
         """
         self._begin_index = begin_index
-    def scale_model_input(self, sample: torch.FloatTensor, timestep: Optional[int] = None) -> torch.FloatTensor:
+    def scale_model_input(self, sample: torch.Tensor, timestep: Optional[int] = None) -> torch.Tensor:
         """
         Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
         current timestep.
         Args:
-            sample (`torch.FloatTensor`):
+            sample (`torch.Tensor`):
                 The input sample.
             timestep (`int`, *optional*):
                 The current timestep in the diffusion chain.
         Returns:
-            `torch.FloatTensor`:
+            `torch.Tensor`:
                 A scaled input sample.
         """
         return sample
@@ -325,7 +326,7 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
         return variance
     # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
-    def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
+    def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
         """
         "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
         prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
@@ -364,7 +365,7 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
         device: Union[str, torch.device] = None,
         original_inference_steps: Optional[int] = None,
         timesteps: Optional[List[int]] = None,
-        strength: int = 1.0,
+        strength: float = 1.0,
     ):
         """
         Sets the discrete timesteps used for the diffusion chain (to be run before inference).
@@ -384,6 +385,8 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
                 Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
                 timestep spacing strategy of equal spacing between timesteps on the training/distillation timestep
                 schedule is used. If `timesteps` is passed, `num_inference_steps` must be `None`.
+            strength (`float`, *optional*, defaults to 1.0):
+                Used to determine the number of timesteps used for inference when using img2img, inpaint, etc.
         """
         # 0. Check inputs
         if num_inference_steps is None and timesteps is None:
@@ -521,9 +524,9 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
     def step(
         self,
-        model_output: torch.FloatTensor,
+        model_output: torch.Tensor,
         timestep: int,
-        sample: torch.FloatTensor,
+        sample: torch.Tensor,
         eta: float = 0.3,
         generator: Optional[torch.Generator] = None,
         return_dict: bool = True,
@@ -533,15 +536,16 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
         process from the learned model outputs (most often the predicted noise).
         Args:
-            model_output (`torch.FloatTensor`):
+            model_output (`torch.Tensor`):
                 The direct output from learned diffusion model.
             timestep (`int`):
                 The current discrete timestep in the diffusion chain.
-            sample (`torch.FloatTensor`):
+            sample (`torch.Tensor`):
                 A current instance of a sample created by the diffusion process.
             eta (`float`):
-                A stochastic parameter (referred to as `gamma` in the paper) used to control the stochasticity in every step.
-                When eta = 0, it represents deterministic sampling, whereas eta = 1 indicates full stochastic sampling.
+                A stochastic parameter (referred to as `gamma` in the paper) used to control the stochasticity in every
+                step. When eta = 0, it represents deterministic sampling, whereas eta = 1 indicates full stochastic
+                sampling.
             generator (`torch.Generator`, *optional*):
                 A random number generator.
             return_dict (`bool`, *optional*, defaults to `True`):
@@ -624,14 +628,18 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
         return TCDSchedulerOutput(prev_sample=prev_sample, pred_noised_sample=pred_noised_sample)
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise
     def add_noise(
         self,
-        original_samples: torch.FloatTensor,
-        noise: torch.FloatTensor,
+        original_samples: torch.Tensor,
+        noise: torch.Tensor,
         timesteps: torch.IntTensor,
-    ) -> torch.FloatTensor:
+    ) -> torch.Tensor:
         # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
-        alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
+        # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
+        # for the subsequent add_noise calls
+        self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
+        alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
         timesteps = timesteps.to(original_samples.device)
         sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
@@ -647,11 +655,11 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
         noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
         return noisy_samples
-    def get_velocity(
-        self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
-    ) -> torch.FloatTensor:
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
+    def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
         # Make sure alphas_cumprod and timestep have same device and dtype as sample
-        alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
+        self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
+        alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
         timesteps = timesteps.to(sample.device)
         sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
@@ -670,6 +678,7 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
     def __len__(self):
         return self.config.num_train_timesteps
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.previous_timestep
     def previous_timestep(self, timestep):
         if self.custom_timesteps:
             index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0]

diffusers/schedulers/scheduling_unclip.py CHANGED Viewed

@@ -32,16 +32,16 @@ class UnCLIPSchedulerOutput(BaseOutput):
     Output class for the scheduler's `step` function output.
     Args:
-        prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+        prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
             Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
             denoising loop.
-        pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+        pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
             The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
             `pred_original_sample` can be used to preview progress or for guidance.
     """
-    prev_sample: torch.FloatTensor
-    pred_original_sample: Optional[torch.FloatTensor] = None
+    prev_sample: torch.Tensor
+    pred_original_sample: Optional[torch.Tensor] = None
 # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
@@ -79,7 +79,7 @@ def betas_for_alpha_bar(
             return math.exp(t * -12.0)
     else:
-        raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
+        raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}")
     betas = []
     for i in range(num_diffusion_timesteps):
@@ -146,17 +146,17 @@ class UnCLIPScheduler(SchedulerMixin, ConfigMixin):
         self.variance_type = variance_type
-    def scale_model_input(self, sample: torch.FloatTensor, timestep: Optional[int] = None) -> torch.FloatTensor:
+    def scale_model_input(self, sample: torch.Tensor, timestep: Optional[int] = None) -> torch.Tensor:
         """
         Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
         current timestep.
         Args:
-            sample (`torch.FloatTensor`): input sample
+            sample (`torch.Tensor`): input sample
             timestep (`int`, optional): current timestep
         Returns:
-            `torch.FloatTensor`: scaled input sample
+            `torch.Tensor`: scaled input sample
         """
         return sample
@@ -215,9 +215,9 @@ class UnCLIPScheduler(SchedulerMixin, ConfigMixin):
     def step(
         self,
-        model_output: torch.FloatTensor,
+        model_output: torch.Tensor,
         timestep: int,
-        sample: torch.FloatTensor,
+        sample: torch.Tensor,
         prev_timestep: Optional[int] = None,
         generator=None,
         return_dict: bool = True,
@@ -227,9 +227,9 @@ class UnCLIPScheduler(SchedulerMixin, ConfigMixin):
         process from the learned model outputs (most often the predicted noise).
         Args:
-            model_output (`torch.FloatTensor`): direct output from learned diffusion model.
+            model_output (`torch.Tensor`): direct output from learned diffusion model.
             timestep (`int`): current discrete timestep in the diffusion chain.
-            sample (`torch.FloatTensor`):
+            sample (`torch.Tensor`):
                 current instance of sample being created by diffusion process.
             prev_timestep (`int`, *optional*): The previous timestep to predict the previous sample at.
                 Used to dynamically compute beta. If not given, `t-1` is used and the pre-computed beta is used.
@@ -327,10 +327,10 @@ class UnCLIPScheduler(SchedulerMixin, ConfigMixin):
     # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise
     def add_noise(
         self,
-        original_samples: torch.FloatTensor,
-        noise: torch.FloatTensor,
+        original_samples: torch.Tensor,
+        noise: torch.Tensor,
         timesteps: torch.IntTensor,
-    ) -> torch.FloatTensor:
+    ) -> torch.Tensor:
         # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
         # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
         # for the subsequent add_noise calls

diffusers 0.27.2__py3-none-any.whl → 0.28.0__py3-none-any.whl

diffusers 0.27.2py3-none-any.whl → 0.28.0py3-none-any.whl