PyPI - diffusers - Versions diffs - 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl - Mend

diffusers 0.24.0py3-none-any.whl → 0.25.0py3-none-any.whl

Files changed (174) hide show

diffusers/schedulers/scheduling_amused.py ADDED Viewed

@@ -0,0 +1,162 @@
+import math
+from dataclasses import dataclass
+from typing import List, Optional, Tuple, Union
+import torch
+from ..configuration_utils import ConfigMixin, register_to_config
+from ..utils import BaseOutput
+from .scheduling_utils import SchedulerMixin
+def gumbel_noise(t, generator=None):
+    device = generator.device if generator is not None else t.device
+    noise = torch.zeros_like(t, device=device).uniform_(0, 1, generator=generator).to(t.device)
+    return -torch.log((-torch.log(noise.clamp(1e-20))).clamp(1e-20))
+def mask_by_random_topk(mask_len, probs, temperature=1.0, generator=None):
+    confidence = torch.log(probs.clamp(1e-20)) + temperature * gumbel_noise(probs, generator=generator)
+    sorted_confidence = torch.sort(confidence, dim=-1).values
+    cut_off = torch.gather(sorted_confidence, 1, mask_len.long())
+    masking = confidence < cut_off
+    return masking
+@dataclass
+class AmusedSchedulerOutput(BaseOutput):
+    """
+    Output class for the scheduler's `step` function output.
+    Args:
+        prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+            Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
+            denoising loop.
+        pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+            The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
+            `pred_original_sample` can be used to preview progress or for guidance.
+    """
+    prev_sample: torch.FloatTensor
+    pred_original_sample: torch.FloatTensor = None
+class AmusedScheduler(SchedulerMixin, ConfigMixin):
+    order = 1
+    temperatures: torch.Tensor
+    @register_to_config
+    def __init__(
+        self,
+        mask_token_id: int,
+        masking_schedule: str = "cosine",
+    ):
+        self.temperatures = None
+        self.timesteps = None
+    def set_timesteps(
+        self,
+        num_inference_steps: int,
+        temperature: Union[int, Tuple[int, int], List[int]] = (2, 0),
+        device: Union[str, torch.device] = None,
+    ):
+        self.timesteps = torch.arange(num_inference_steps, device=device).flip(0)
+        if isinstance(temperature, (tuple, list)):
+            self.temperatures = torch.linspace(temperature[0], temperature[1], num_inference_steps, device=device)
+        else:
+            self.temperatures = torch.linspace(temperature, 0.01, num_inference_steps, device=device)
+    def step(
+        self,
+        model_output: torch.FloatTensor,
+        timestep: torch.long,
+        sample: torch.LongTensor,
+        starting_mask_ratio: int = 1,
+        generator: Optional[torch.Generator] = None,
+        return_dict: bool = True,
+    ) -> Union[AmusedSchedulerOutput, Tuple]:
+        two_dim_input = sample.ndim == 3 and model_output.ndim == 4
+        if two_dim_input:
+            batch_size, codebook_size, height, width = model_output.shape
+            sample = sample.reshape(batch_size, height * width)
+            model_output = model_output.reshape(batch_size, codebook_size, height * width).permute(0, 2, 1)
+        unknown_map = sample == self.config.mask_token_id
+        probs = model_output.softmax(dim=-1)
+        device = probs.device
+        probs_ = probs.to(generator.device) if generator is not None else probs  # handles when generator is on CPU
+        if probs_.device.type == "cpu" and probs_.dtype != torch.float32:
+            probs_ = probs_.float()  # multinomial is not implemented for cpu half precision
+        probs_ = probs_.reshape(-1, probs.size(-1))
+        pred_original_sample = torch.multinomial(probs_, 1, generator=generator).to(device=device)
+        pred_original_sample = pred_original_sample[:, 0].view(*probs.shape[:-1])
+        pred_original_sample = torch.where(unknown_map, pred_original_sample, sample)
+        if timestep == 0:
+            prev_sample = pred_original_sample
+        else:
+            seq_len = sample.shape[1]
+            step_idx = (self.timesteps == timestep).nonzero()
+            ratio = (step_idx + 1) / len(self.timesteps)
+            if self.config.masking_schedule == "cosine":
+                mask_ratio = torch.cos(ratio * math.pi / 2)
+            elif self.config.masking_schedule == "linear":
+                mask_ratio = 1 - ratio
+            else:
+                raise ValueError(f"unknown masking schedule {self.config.masking_schedule}")
+            mask_ratio = starting_mask_ratio * mask_ratio
+            mask_len = (seq_len * mask_ratio).floor()
+            # do not mask more than amount previously masked
+            mask_len = torch.min(unknown_map.sum(dim=-1, keepdim=True) - 1, mask_len)
+            # mask at least one
+            mask_len = torch.max(torch.tensor([1], device=model_output.device), mask_len)
+            selected_probs = torch.gather(probs, -1, pred_original_sample[:, :, None])[:, :, 0]
+            # Ignores the tokens given in the input by overwriting their confidence.
+            selected_probs = torch.where(unknown_map, selected_probs, torch.finfo(selected_probs.dtype).max)
+            masking = mask_by_random_topk(mask_len, selected_probs, self.temperatures[step_idx], generator)
+            # Masks tokens with lower confidence.
+            prev_sample = torch.where(masking, self.config.mask_token_id, pred_original_sample)
+        if two_dim_input:
+            prev_sample = prev_sample.reshape(batch_size, height, width)
+            pred_original_sample = pred_original_sample.reshape(batch_size, height, width)
+        if not return_dict:
+            return (prev_sample, pred_original_sample)
+        return AmusedSchedulerOutput(prev_sample, pred_original_sample)
+    def add_noise(self, sample, timesteps, generator=None):
+        step_idx = (self.timesteps == timesteps).nonzero()
+        ratio = (step_idx + 1) / len(self.timesteps)
+        if self.config.masking_schedule == "cosine":
+            mask_ratio = torch.cos(ratio * math.pi / 2)
+        elif self.config.masking_schedule == "linear":
+            mask_ratio = 1 - ratio
+        else:
+            raise ValueError(f"unknown masking schedule {self.config.masking_schedule}")
+        mask_indices = (
+            torch.rand(
+                sample.shape, device=generator.device if generator is not None else sample.device, generator=generator
+            ).to(sample.device)
+            < mask_ratio
+        )
+        masked_sample = sample.clone()
+        masked_sample[mask_indices] = self.config.mask_token_id
+        return masked_sample

diffusers/schedulers/scheduling_consistency_models.py CHANGED Viewed

@@ -98,6 +98,7 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
         self.custom_timesteps = False
         self.is_scale_input_called = False
         self._step_index = None
+        self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
     def index_for_timestep(self, timestep, schedule_timesteps=None):
         if schedule_timesteps is None:
@@ -230,6 +231,7 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
             self.timesteps = torch.from_numpy(timesteps).to(device=device)
         self._step_index = None
+        self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
     # Modified _convert_to_karras implementation that takes in ramp as argument
     def _convert_to_karras(self, ramp):

diffusers/schedulers/scheduling_ddim_inverse.py CHANGED Viewed

@@ -293,9 +293,6 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
         model_output: torch.FloatTensor,
         timestep: int,
         sample: torch.FloatTensor,
-        eta: float = 0.0,
-        use_clipped_model_output: bool = False,
-        variance_noise: Optional[torch.FloatTensor] = None,
         return_dict: bool = True,
     ) -> Union[DDIMSchedulerOutput, Tuple]:
         """
@@ -332,7 +329,7 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
         # 1. get previous step value (=t+1)
         prev_timestep = timestep
         timestep = min(
-            timestep - self.config.num_train_timesteps // self.num_inference_steps, self.num_train_timesteps - 1
+            timestep - self.config.num_train_timesteps // self.num_inference_steps, self.config.num_train_timesteps - 1
         )
         # 2. compute alphas, betas

diffusers/schedulers/scheduling_ddpm.py CHANGED Viewed

@@ -89,6 +89,43 @@ def betas_for_alpha_bar(
     return torch.tensor(betas, dtype=torch.float32)
+# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
+def rescale_zero_terminal_snr(betas):
+    """
+    Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
+    Args:
+        betas (`torch.FloatTensor`):
+            the betas that the scheduler is being initialized with.
+    Returns:
+        `torch.FloatTensor`: rescaled betas with zero terminal SNR
+    """
+    # Convert betas to alphas_bar_sqrt
+    alphas = 1.0 - betas
+    alphas_cumprod = torch.cumprod(alphas, dim=0)
+    alphas_bar_sqrt = alphas_cumprod.sqrt()
+    # Store old values.
+    alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
+    alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
+    # Shift so the last timestep is zero.
+    alphas_bar_sqrt -= alphas_bar_sqrt_T
+    # Scale so the first timestep is back to the old value.
+    alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
+    # Convert alphas_bar_sqrt to betas
+    alphas_bar = alphas_bar_sqrt**2  # Revert sqrt
+    alphas = alphas_bar[1:] / alphas_bar[:-1]  # Revert cumprod
+    alphas = torch.cat([alphas_bar[0:1], alphas])
+    betas = 1 - alphas
+    return betas
 class DDPMScheduler(SchedulerMixin, ConfigMixin):
     """
     `DDPMScheduler` explores the connections between denoising score matching and Langevin dynamics sampling.
@@ -131,6 +168,10 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
             An offset added to the inference steps. You can use a combination of `offset=1` and
             `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
             Diffusion.
+        rescale_betas_zero_snr (`bool`, defaults to `False`):
+            Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
+            dark samples instead of limiting it to samples with medium brightness. Loosely related to
+            [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
     """
     _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -153,6 +194,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
         sample_max_value: float = 1.0,
         timestep_spacing: str = "leading",
         steps_offset: int = 0,
+        rescale_betas_zero_snr: int = False,
     ):
         if trained_betas is not None:
             self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -171,6 +213,10 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
         else:
             raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
+        # Rescale for zero SNR
+        if rescale_betas_zero_snr:
+            self.betas = rescale_zero_terminal_snr(self.betas)
         self.alphas = 1.0 - self.betas
         self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
         self.one = torch.tensor(1.0)

diffusers/schedulers/scheduling_ddpm_parallel.py CHANGED Viewed

@@ -91,6 +91,43 @@ def betas_for_alpha_bar(
     return torch.tensor(betas, dtype=torch.float32)
+# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
+def rescale_zero_terminal_snr(betas):
+    """
+    Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
+    Args:
+        betas (`torch.FloatTensor`):
+            the betas that the scheduler is being initialized with.
+    Returns:
+        `torch.FloatTensor`: rescaled betas with zero terminal SNR
+    """
+    # Convert betas to alphas_bar_sqrt
+    alphas = 1.0 - betas
+    alphas_cumprod = torch.cumprod(alphas, dim=0)
+    alphas_bar_sqrt = alphas_cumprod.sqrt()
+    # Store old values.
+    alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
+    alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
+    # Shift so the last timestep is zero.
+    alphas_bar_sqrt -= alphas_bar_sqrt_T
+    # Scale so the first timestep is back to the old value.
+    alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
+    # Convert alphas_bar_sqrt to betas
+    alphas_bar = alphas_bar_sqrt**2  # Revert sqrt
+    alphas = alphas_bar[1:] / alphas_bar[:-1]  # Revert cumprod
+    alphas = torch.cat([alphas_bar[0:1], alphas])
+    betas = 1 - alphas
+    return betas
 class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
     """
     Denoising diffusion probabilistic models (DDPMs) explores the connections between denoising score matching and
@@ -139,6 +176,10 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
             an offset added to the inference steps. You can use a combination of `offset=1` and
             `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
             stable diffusion.
+        rescale_betas_zero_snr (`bool`, defaults to `False`):
+            Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
+            dark samples instead of limiting it to samples with medium brightness. Loosely related to
+            [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
     """
     _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -163,6 +204,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
         sample_max_value: float = 1.0,
         timestep_spacing: str = "leading",
         steps_offset: int = 0,
+        rescale_betas_zero_snr: int = False,
     ):
         if trained_betas is not None:
             self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -181,6 +223,10 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
         else:
             raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
+        # Rescale for zero SNR
+        if rescale_betas_zero_snr:
+            self.betas = rescale_zero_terminal_snr(self.betas)
         self.alphas = 1.0 - self.betas
         self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
         self.one = torch.tensor(1.0)

diffusers/schedulers/scheduling_deis_multistep.py CHANGED Viewed

@@ -162,6 +162,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
         self.alpha_t = torch.sqrt(self.alphas_cumprod)
         self.sigma_t = torch.sqrt(1 - self.alphas_cumprod)
         self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t)
+        self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5
         # standard deviation of the initial noise distribution
         self.init_noise_sigma = 1.0
@@ -186,6 +187,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
         self.model_outputs = [None] * solver_order
         self.lower_order_nums = 0
         self._step_index = None
+        self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
     @property
     def step_index(self):
@@ -253,6 +255,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
         # add an index counter for schedulers that allow duplicated timesteps
         self._step_index = None
+        self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
     # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
     def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
@@ -733,7 +736,16 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
             schedule_timesteps = self.timesteps.to(original_samples.device)
             timesteps = timesteps.to(original_samples.device)
-        step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
+        step_indices = []
+        for timestep in timesteps:
+            index_candidates = (schedule_timesteps == timestep).nonzero()
+            if len(index_candidates) == 0:
+                step_index = len(schedule_timesteps) - 1
+            elif len(index_candidates) > 1:
+                step_index = index_candidates[1].item()
+            else:
+                step_index = index_candidates[0].item()
+            step_indices.append(step_index)
         sigma = sigmas[step_indices].flatten()
         while len(sigma.shape) < len(original_samples.shape):

diffusers/schedulers/scheduling_dpmsolver_multistep.py CHANGED Viewed

@@ -189,6 +189,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
         self.alpha_t = torch.sqrt(self.alphas_cumprod)
         self.sigma_t = torch.sqrt(1 - self.alphas_cumprod)
         self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t)
+        self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5
         # standard deviation of the initial noise distribution
         self.init_noise_sigma = 1.0
@@ -213,6 +214,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
         self.model_outputs = [None] * solver_order
         self.lower_order_nums = 0
         self._step_index = None
+        self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
     @property
     def step_index(self):
@@ -289,6 +291,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
         # add an index counter for schedulers that allow duplicated timesteps
         self._step_index = None
+        self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
     # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
     def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
@@ -895,7 +898,16 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
             schedule_timesteps = self.timesteps.to(original_samples.device)
             timesteps = timesteps.to(original_samples.device)
-        step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
+        step_indices = []
+        for timestep in timesteps:
+            index_candidates = (schedule_timesteps == timestep).nonzero()
+            if len(index_candidates) == 0:
+                step_index = len(schedule_timesteps) - 1
+            elif len(index_candidates) > 1:
+                step_index = index_candidates[1].item()
+            else:
+                step_index = index_candidates[0].item()
+            step_indices.append(step_index)
         sigma = sigmas[step_indices].flatten()
         while len(sigma.shape) < len(original_samples.shape):

diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py CHANGED Viewed

@@ -184,6 +184,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
         self.alpha_t = torch.sqrt(self.alphas_cumprod)
         self.sigma_t = torch.sqrt(1 - self.alphas_cumprod)
         self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t)
+        self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5
         # standard deviation of the initial noise distribution
         self.init_noise_sigma = 1.0
@@ -208,6 +209,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
         self.model_outputs = [None] * solver_order
         self.lower_order_nums = 0
         self._step_index = None
+        self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
         self.use_karras_sigmas = use_karras_sigmas
     @property
@@ -288,6 +290,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
         # add an index counter for schedulers that allow duplicated timesteps
         self._step_index = None
+        self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
     # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
     def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
@@ -890,7 +893,16 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
             schedule_timesteps = self.timesteps.to(original_samples.device)
             timesteps = timesteps.to(original_samples.device)
-        step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
+        step_indices = []
+        for timestep in timesteps:
+            index_candidates = (schedule_timesteps == timestep).nonzero()
+            if len(index_candidates) == 0:
+                step_index = len(schedule_timesteps) - 1
+            elif len(index_candidates) > 1:
+                step_index = index_candidates[1].item()
+            else:
+                step_index = index_candidates[0].item()
+            step_indices.append(step_index)
         sigma = sigmas[step_indices].flatten()
         while len(sigma.shape) < len(original_samples.shape):

diffusers/schedulers/scheduling_dpmsolver_sde.py CHANGED Viewed

@@ -198,6 +198,7 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
         self.noise_sampler = None
         self.noise_sampler_seed = noise_sampler_seed
         self._step_index = None
+        self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
     # Copied from diffusers.schedulers.scheduling_heun_discrete.HeunDiscreteScheduler.index_for_timestep
     def index_for_timestep(self, timestep, schedule_timesteps=None):
@@ -347,6 +348,7 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
         self.mid_point_sigma = None
         self._step_index = None
+        self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
         self.noise_sampler = None
         # for exp beta schedules, such as the one for `pipeline_shap_e.py`

diffusers/schedulers/scheduling_dpmsolver_singlestep.py CHANGED Viewed

@@ -172,6 +172,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
         self.alpha_t = torch.sqrt(self.alphas_cumprod)
         self.sigma_t = torch.sqrt(1 - self.alphas_cumprod)
         self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t)
+        self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5
         # standard deviation of the initial noise distribution
         self.init_noise_sigma = 1.0
@@ -196,6 +197,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
         self.sample = None
         self.order_list = self.get_order_list(num_train_timesteps)
         self._step_index = None
+        self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
     def get_order_list(self, num_inference_steps: int) -> List[int]:
         """
@@ -287,6 +289,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
         # add an index counter for schedulers that allow duplicated timesteps
         self._step_index = None
+        self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
     # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
     def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
@@ -896,7 +899,16 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
             schedule_timesteps = self.timesteps.to(original_samples.device)
             timesteps = timesteps.to(original_samples.device)
-        step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
+        step_indices = []
+        for timestep in timesteps:
+            index_candidates = (schedule_timesteps == timestep).nonzero()
+            if len(index_candidates) == 0:
+                step_index = len(schedule_timesteps) - 1
+            elif len(index_candidates) > 1:
+                step_index = index_candidates[1].item()
+            else:
+                step_index = index_candidates[0].item()
+            step_indices.append(step_index)
         sigma = sigmas[step_indices].flatten()
         while len(sigma.shape) < len(original_samples.shape):

diffusers/schedulers/scheduling_euler_ancestral_discrete.py CHANGED Viewed

@@ -92,6 +92,43 @@ def betas_for_alpha_bar(
     return torch.tensor(betas, dtype=torch.float32)
+# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
+def rescale_zero_terminal_snr(betas):
+    """
+    Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
+    Args:
+        betas (`torch.FloatTensor`):
+            the betas that the scheduler is being initialized with.
+    Returns:
+        `torch.FloatTensor`: rescaled betas with zero terminal SNR
+    """
+    # Convert betas to alphas_bar_sqrt
+    alphas = 1.0 - betas
+    alphas_cumprod = torch.cumprod(alphas, dim=0)
+    alphas_bar_sqrt = alphas_cumprod.sqrt()
+    # Store old values.
+    alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
+    alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
+    # Shift so the last timestep is zero.
+    alphas_bar_sqrt -= alphas_bar_sqrt_T
+    # Scale so the first timestep is back to the old value.
+    alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
+    # Convert alphas_bar_sqrt to betas
+    alphas_bar = alphas_bar_sqrt**2  # Revert sqrt
+    alphas = alphas_bar[1:] / alphas_bar[:-1]  # Revert cumprod
+    alphas = torch.cat([alphas_bar[0:1], alphas])
+    betas = 1 - alphas
+    return betas
 class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
     """
     Ancestral sampling with Euler method steps.
@@ -122,6 +159,10 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
             An offset added to the inference steps. You can use a combination of `offset=1` and
             `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
             Diffusion.
+        rescale_betas_zero_snr (`bool`, defaults to `False`):
+            Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
+            dark samples instead of limiting it to samples with medium brightness. Loosely related to
+            [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
     """
     _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -138,6 +179,7 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
         prediction_type: str = "epsilon",
         timestep_spacing: str = "linspace",
         steps_offset: int = 0,
+        rescale_betas_zero_snr: bool = False,
     ):
         if trained_betas is not None:
             self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -152,9 +194,17 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
         else:
             raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
+        if rescale_betas_zero_snr:
+            self.betas = rescale_zero_terminal_snr(self.betas)
         self.alphas = 1.0 - self.betas
         self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
+        if rescale_betas_zero_snr:
+            # Close to 0 without being 0 so first sigma is not inf
+            # FP16 smallest positive subnormal works well here
+            self.alphas_cumprod[-1] = 2**-24
         sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
         sigmas = np.concatenate([sigmas[::-1], [0.0]]).astype(np.float32)
         self.sigmas = torch.from_numpy(sigmas)
@@ -166,6 +216,7 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
         self.is_scale_input_called = False
         self._step_index = None
+        self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
     @property
     def init_noise_sigma(self):
@@ -249,6 +300,7 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
         self.timesteps = torch.from_numpy(timesteps).to(device=device)
         self._step_index = None
+        self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
     # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index
     def _init_step_index(self, timestep):
@@ -325,6 +377,9 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
         sigma = self.sigmas[self.step_index]
+        # Upcast to avoid precision issues when computing prev_sample
+        sample = sample.to(torch.float32)
         # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise
         if self.config.prediction_type == "epsilon":
             pred_original_sample = sample - sigma * model_output
@@ -355,6 +410,9 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
         prev_sample = prev_sample + noise * sigma_up
+        # Cast sample back to model compatible dtype
+        prev_sample = prev_sample.to(model_output.dtype)
         # upon completion increase step index by one
         self._step_index += 1

diffusers 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

diffusers 0.24.0py3-none-any.whl → 0.25.0py3-none-any.whl