PyPI - diffusers - Versions diffs - 0.17.1__py3-none-any.whl → 0.18.2__py3-none-any.whl - Mend

diffusers 0.17.1py3-none-any.whl → 0.18.2py3-none-any.whl

Files changed (120) hide show

diffusers/__init__.py +26 -1
diffusers/configuration_utils.py +34 -29
diffusers/dependency_versions_table.py +4 -0
diffusers/image_processor.py +125 -12
diffusers/loaders.py +169 -203
diffusers/models/attention.py +24 -1
diffusers/models/attention_flax.py +10 -5
diffusers/models/attention_processor.py +3 -0
diffusers/models/autoencoder_kl.py +114 -33
diffusers/models/controlnet.py +131 -14
diffusers/models/controlnet_flax.py +37 -26
diffusers/models/cross_attention.py +17 -17
diffusers/models/embeddings.py +67 -0
diffusers/models/modeling_flax_utils.py +64 -56
diffusers/models/modeling_utils.py +193 -104
diffusers/models/prior_transformer.py +207 -37
diffusers/models/resnet.py +26 -26
diffusers/models/transformer_2d.py +36 -41
diffusers/models/transformer_temporal.py +24 -21
diffusers/models/unet_1d.py +31 -25
diffusers/models/unet_2d.py +43 -30
diffusers/models/unet_2d_blocks.py +210 -89
diffusers/models/unet_2d_blocks_flax.py +12 -12
diffusers/models/unet_2d_condition.py +172 -64
diffusers/models/unet_2d_condition_flax.py +38 -24
diffusers/models/unet_3d_blocks.py +34 -31
diffusers/models/unet_3d_condition.py +101 -34
diffusers/models/vae.py +5 -5
diffusers/models/vae_flax.py +37 -34
diffusers/models/vq_model.py +23 -14
diffusers/pipelines/__init__.py +24 -1
diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +1 -1
diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -3
diffusers/pipelines/consistency_models/__init__.py +1 -0
diffusers/pipelines/consistency_models/pipeline_consistency_models.py +337 -0
diffusers/pipelines/controlnet/multicontrolnet.py +120 -1
diffusers/pipelines/controlnet/pipeline_controlnet.py +59 -17
diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +60 -15
diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +60 -17
diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
diffusers/pipelines/kandinsky/__init__.py +1 -1
diffusers/pipelines/kandinsky/pipeline_kandinsky.py +4 -6
diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +1 -0
diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +1 -0
diffusers/pipelines/kandinsky2_2/__init__.py +7 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +317 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +372 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +434 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +398 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +531 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +541 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +605 -0
diffusers/pipelines/pipeline_flax_utils.py +2 -2
diffusers/pipelines/pipeline_utils.py +124 -146
diffusers/pipelines/shap_e/__init__.py +27 -0
diffusers/pipelines/shap_e/camera.py +147 -0
diffusers/pipelines/shap_e/pipeline_shap_e.py +390 -0
diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +349 -0
diffusers/pipelines/shap_e/renderer.py +709 -0
diffusers/pipelines/stable_diffusion/__init__.py +2 -0
diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +261 -66
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +3 -3
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -3
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +4 -2
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +1 -1
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +719 -0
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +1 -1
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +832 -0
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +17 -7
diffusers/pipelines/stable_diffusion_xl/__init__.py +26 -0
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +823 -0
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +896 -0
diffusers/pipelines/stable_diffusion_xl/watermark.py +31 -0
diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -1
diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +5 -1
diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +771 -0
diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +92 -6
diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +209 -91
diffusers/schedulers/__init__.py +3 -0
diffusers/schedulers/scheduling_consistency_models.py +380 -0
diffusers/schedulers/scheduling_ddim.py +28 -6
diffusers/schedulers/scheduling_ddim_inverse.py +19 -4
diffusers/schedulers/scheduling_ddim_parallel.py +642 -0
diffusers/schedulers/scheduling_ddpm.py +53 -7
diffusers/schedulers/scheduling_ddpm_parallel.py +604 -0
diffusers/schedulers/scheduling_deis_multistep.py +66 -11
diffusers/schedulers/scheduling_dpmsolver_multistep.py +55 -13
diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +19 -4
diffusers/schedulers/scheduling_dpmsolver_sde.py +73 -11
diffusers/schedulers/scheduling_dpmsolver_singlestep.py +23 -7
diffusers/schedulers/scheduling_euler_ancestral_discrete.py +58 -9
diffusers/schedulers/scheduling_euler_discrete.py +58 -8
diffusers/schedulers/scheduling_heun_discrete.py +89 -14
diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +73 -11
diffusers/schedulers/scheduling_k_dpm_2_discrete.py +73 -11
diffusers/schedulers/scheduling_lms_discrete.py +57 -8
diffusers/schedulers/scheduling_pndm.py +46 -10
diffusers/schedulers/scheduling_repaint.py +19 -4
diffusers/schedulers/scheduling_sde_ve.py +5 -1
diffusers/schedulers/scheduling_unclip.py +43 -4
diffusers/schedulers/scheduling_unipc_multistep.py +48 -7
diffusers/training_utils.py +1 -1
diffusers/utils/__init__.py +2 -1
diffusers/utils/dummy_pt_objects.py +60 -0
diffusers/utils/dummy_torch_and_transformers_and_invisible_watermark_objects.py +32 -0
diffusers/utils/dummy_torch_and_transformers_objects.py +180 -0
diffusers/utils/hub_utils.py +1 -1
diffusers/utils/import_utils.py +20 -3
diffusers/utils/logging.py +15 -18
diffusers/utils/outputs.py +3 -3
diffusers/utils/testing_utils.py +15 -0
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/METADATA +4 -2
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/RECORD +120 -94
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/WHEEL +1 -1
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/LICENSE +0 -0
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/entry_points.txt +0 -0
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/top_level.txt +0 -0

diffusers/schedulers/scheduling_euler_discrete.py CHANGED Viewed

@@ -47,7 +47,11 @@ class EulerDiscreteSchedulerOutput(BaseOutput):
 # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
-def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
+def betas_for_alpha_bar(
+    num_diffusion_timesteps,
+    max_beta=0.999,
+    alpha_transform_type="cosine",
+):
     """
     Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
     (1-beta) over time from t = [0,1].
@@ -60,19 +64,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
         num_diffusion_timesteps (`int`): the number of betas to produce.
         max_beta (`float`): the maximum beta to use; use values lower than 1 to
                      prevent singularities.
+        alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+                     Choose from `cosine` or `exp`
     Returns:
         betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
     """
+    if alpha_transform_type == "cosine":
-    def alpha_bar(time_step):
-        return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
+        def alpha_bar_fn(t):
+            return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
+    elif alpha_transform_type == "exp":
+        def alpha_bar_fn(t):
+            return math.exp(t * -12.0)
+    else:
+        raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
     betas = []
     for i in range(num_diffusion_timesteps):
         t1 = i / num_diffusion_timesteps
         t2 = (i + 1) / num_diffusion_timesteps
-        betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
+        betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
     return torch.tensor(betas, dtype=torch.float32)
@@ -107,6 +122,13 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
              This parameter controls whether to use Karras sigmas (Karras et al. (2022) scheme) for step sizes in the
              noise schedule during the sampling process. If True, the sigmas will be determined according to a sequence
              of noise levels {σi} as defined in Equation (5) of the paper https://arxiv.org/pdf/2206.00364.pdf.
+        timestep_spacing (`str`, default `"linspace"`):
+            The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
+            Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
+        steps_offset (`int`, default `0`):
+            an offset added to the inference steps. You can use a combination of `offset=1` and
+            `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
+            stable diffusion.
     """
     _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -123,6 +145,8 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
         prediction_type: str = "epsilon",
         interpolation_type: str = "linear",
         use_karras_sigmas: Optional[bool] = False,
+        timestep_spacing: str = "linspace",
+        steps_offset: int = 0,
     ):
         if trained_betas is not None:
             self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -146,9 +170,6 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
         sigmas = np.concatenate([sigmas[::-1], [0.0]]).astype(np.float32)
         self.sigmas = torch.from_numpy(sigmas)
-        # standard deviation of the initial noise distribution
-        self.init_noise_sigma = self.sigmas.max()
         # setable values
         self.num_inference_steps = None
         timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=float)[::-1].copy()
@@ -156,6 +177,14 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
         self.is_scale_input_called = False
         self.use_karras_sigmas = use_karras_sigmas
+    @property
+    def init_noise_sigma(self):
+        # standard deviation of the initial noise distribution
+        if self.config.timestep_spacing in ["linspace", "trailing"]:
+            return self.sigmas.max()
+        return (self.sigmas.max() ** 2 + 1) ** 0.5
     def scale_model_input(
         self, sample: torch.FloatTensor, timestep: Union[float, torch.FloatTensor]
     ) -> torch.FloatTensor:
@@ -191,7 +220,28 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
         """
         self.num_inference_steps = num_inference_steps
-        timesteps = np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps, dtype=float)[::-1].copy()
+        # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
+        if self.config.timestep_spacing == "linspace":
+            timesteps = np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps, dtype=float)[
+                ::-1
+            ].copy()
+        elif self.config.timestep_spacing == "leading":
+            step_ratio = self.config.num_train_timesteps // self.num_inference_steps
+            # creates integer timesteps by multiplying by ratio
+            # casting to int to avoid issues when num_inference_step is power of 3
+            timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(float)
+            timesteps += self.config.steps_offset
+        elif self.config.timestep_spacing == "trailing":
+            step_ratio = self.config.num_train_timesteps / self.num_inference_steps
+            # creates integer timesteps by multiplying by ratio
+            # casting to int to avoid issues when num_inference_step is power of 3
+            timesteps = (np.arange(self.config.num_train_timesteps, 0, -step_ratio)).round().copy().astype(float)
+            timesteps -= 1
+        else:
+            raise ValueError(
+                f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
+            )
         sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
         log_sigmas = np.log(sigmas)

diffusers/schedulers/scheduling_heun_discrete.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import math
+from collections import defaultdict
 from typing import List, Optional, Tuple, Union
 import numpy as np
@@ -23,7 +24,11 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, Schedul
 # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
-def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999) -> torch.Tensor:
+def betas_for_alpha_bar(
+    num_diffusion_timesteps,
+    max_beta=0.999,
+    alpha_transform_type="cosine",
+):
     """
     Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
     (1-beta) over time from t = [0,1].
@@ -36,19 +41,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999) -> torch.Tensor
         num_diffusion_timesteps (`int`): the number of betas to produce.
         max_beta (`float`): the maximum beta to use; use values lower than 1 to
                      prevent singularities.
+        alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+                     Choose from `cosine` or `exp`
     Returns:
         betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
     """
+    if alpha_transform_type == "cosine":
-    def alpha_bar(time_step):
-        return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
+        def alpha_bar_fn(t):
+            return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
+    elif alpha_transform_type == "exp":
+        def alpha_bar_fn(t):
+            return math.exp(t * -12.0)
+    else:
+        raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
     betas = []
     for i in range(num_diffusion_timesteps):
         t1 = i / num_diffusion_timesteps
         t2 = (i + 1) / num_diffusion_timesteps
-        betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
+        betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
     return torch.tensor(betas, dtype=torch.float32)
@@ -74,10 +90,21 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
             prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
             process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
             https://imagen.research.google/video/paper.pdf).
+        clip_sample (`bool`, default `True`):
+            option to clip predicted sample for numerical stability.
+        clip_sample_range (`float`, default `1.0`):
+            the maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
         use_karras_sigmas (`bool`, *optional*, defaults to `False`):
              This parameter controls whether to use Karras sigmas (Karras et al. (2022) scheme) for step sizes in the
              noise schedule during the sampling process. If True, the sigmas will be determined according to a sequence
              of noise levels {σi} as defined in Equation (5) of the paper https://arxiv.org/pdf/2206.00364.pdf.
+        timestep_spacing (`str`, default `"linspace"`):
+            The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
+            Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
+        steps_offset (`int`, default `0`):
+            an offset added to the inference steps. You can use a combination of `offset=1` and
+            `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
+            stable diffusion.
     """
     _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -93,6 +120,10 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
         trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
         prediction_type: str = "epsilon",
         use_karras_sigmas: Optional[bool] = False,
+        clip_sample: Optional[bool] = False,
+        clip_sample_range: float = 1.0,
+        timestep_spacing: str = "linspace",
+        steps_offset: int = 0,
     ):
         if trained_betas is not None:
             self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -105,7 +136,9 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
             )
         elif beta_schedule == "squaredcos_cap_v2":
             # Glide cosine schedule
-            self.betas = betas_for_alpha_bar(num_train_timesteps)
+            self.betas = betas_for_alpha_bar(num_train_timesteps, alpha_transform_type="cosine")
+        elif beta_schedule == "exp":
+            self.betas = betas_for_alpha_bar(num_train_timesteps, alpha_transform_type="exp")
         else:
             raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
@@ -122,12 +155,26 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
         indices = (schedule_timesteps == timestep).nonzero()
-        if self.state_in_first_order:
-            pos = -1
+        # The sigma index that is taken for the **very** first `step`
+        # is always the second index (or the last index if there is only 1)
+        # This way we can ensure we don't accidentally skip a sigma in
+        # case we start in the middle of the denoising schedule (e.g. for image-to-image)
+        if len(self._index_counter) == 0:
+            pos = 1 if len(indices) > 1 else 0
         else:
-            pos = 0
+            timestep_int = timestep.cpu().item() if torch.is_tensor(timestep) else timestep
+            pos = self._index_counter[timestep_int]
         return indices[pos].item()
+    @property
+    def init_noise_sigma(self):
+        # standard deviation of the initial noise distribution
+        if self.config.timestep_spacing in ["linspace", "trailing"]:
+            return self.sigmas.max()
+        return (self.sigmas.max() ** 2 + 1) ** 0.5
     def scale_model_input(
         self,
         sample: torch.FloatTensor,
@@ -166,13 +213,31 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
         num_train_timesteps = num_train_timesteps or self.config.num_train_timesteps
-        timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=float)[::-1].copy()
+        # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
+        if self.config.timestep_spacing == "linspace":
+            timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=float)[::-1].copy()
+        elif self.config.timestep_spacing == "leading":
+            step_ratio = num_train_timesteps // self.num_inference_steps
+            # creates integer timesteps by multiplying by ratio
+            # casting to int to avoid issues when num_inference_step is power of 3
+            timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(float)
+            timesteps += self.config.steps_offset
+        elif self.config.timestep_spacing == "trailing":
+            step_ratio = num_train_timesteps / self.num_inference_steps
+            # creates integer timesteps by multiplying by ratio
+            # casting to int to avoid issues when num_inference_step is power of 3
+            timesteps = (np.arange(num_train_timesteps, 0, -step_ratio)).round().copy().astype(float)
+            timesteps -= 1
+        else:
+            raise ValueError(
+                f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
+            )
         sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
         log_sigmas = np.log(sigmas)
         sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
-        if self.use_karras_sigmas:
+        if self.config.use_karras_sigmas:
             sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
             timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
@@ -180,9 +245,6 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
         sigmas = torch.from_numpy(sigmas).to(device=device)
         self.sigmas = torch.cat([sigmas[:1], sigmas[1:-1].repeat_interleave(2), sigmas[-1:]])
-        # standard deviation of the initial noise distribution
-        self.init_noise_sigma = self.sigmas.max()
         timesteps = torch.from_numpy(timesteps)
         timesteps = torch.cat([timesteps[:1], timesteps[1:].repeat_interleave(2)])
@@ -196,6 +258,10 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
         self.prev_derivative = None
         self.dt = None
+        # for exp beta schedules, such as the one for `pipeline_shap_e.py`
+        # we need an index counter
+        self._index_counter = defaultdict(int)
     # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t
     def _sigma_to_t(self, sigma, log_sigmas):
         # get log sigma
@@ -260,6 +326,10 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
         """
         step_index = self.index_for_timestep(timestep)
+        # advance index counter by 1
+        timestep_int = timestep.cpu().item() if torch.is_tensor(timestep) else timestep
+        self._index_counter[timestep_int] += 1
         if self.state_in_first_order:
             sigma = self.sigmas[step_index]
             sigma_next = self.sigmas[step_index + 1]
@@ -284,12 +354,17 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
                 sample / (sigma_input**2 + 1)
             )
         elif self.config.prediction_type == "sample":
-            raise NotImplementedError("prediction_type not implemented yet: sample")
+            pred_original_sample = model_output
         else:
             raise ValueError(
                 f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`"
             )
+        if self.config.clip_sample:
+            pred_original_sample = pred_original_sample.clamp(
+                -self.config.clip_sample_range, self.config.clip_sample_range
+            )
         if self.state_in_first_order:
             # 2. Convert to an ODE derivative for 1st order
             derivative = (sample - pred_original_sample) / sigma_hat

diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import math
+from collections import defaultdict
 from typing import List, Optional, Tuple, Union
 import numpy as np
@@ -24,7 +25,11 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, Schedul
 # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
-def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999) -> torch.Tensor:
+def betas_for_alpha_bar(
+    num_diffusion_timesteps,
+    max_beta=0.999,
+    alpha_transform_type="cosine",
+):
     """
     Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
     (1-beta) over time from t = [0,1].
@@ -37,19 +42,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999) -> torch.Tensor
         num_diffusion_timesteps (`int`): the number of betas to produce.
         max_beta (`float`): the maximum beta to use; use values lower than 1 to
                      prevent singularities.
+        alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+                     Choose from `cosine` or `exp`
     Returns:
         betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
     """
+    if alpha_transform_type == "cosine":
-    def alpha_bar(time_step):
-        return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
+        def alpha_bar_fn(t):
+            return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
+    elif alpha_transform_type == "exp":
+        def alpha_bar_fn(t):
+            return math.exp(t * -12.0)
+    else:
+        raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
     betas = []
     for i in range(num_diffusion_timesteps):
         t1 = i / num_diffusion_timesteps
         t2 = (i + 1) / num_diffusion_timesteps
-        betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
+        betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
     return torch.tensor(betas, dtype=torch.float32)
@@ -78,6 +94,13 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
             prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
             process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
             https://imagen.research.google/video/paper.pdf)
+        timestep_spacing (`str`, default `"linspace"`):
+            The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
+            Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
+        steps_offset (`int`, default `0`):
+            an offset added to the inference steps. You can use a combination of `offset=1` and
+            `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
+            stable diffusion.
     """
     _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -92,6 +115,8 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
         beta_schedule: str = "linear",
         trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
         prediction_type: str = "epsilon",
+        timestep_spacing: str = "linspace",
+        steps_offset: int = 0,
     ):
         if trained_betas is not None:
             self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -121,12 +146,26 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
         indices = (schedule_timesteps == timestep).nonzero()
-        if self.state_in_first_order:
-            pos = -1
+        # The sigma index that is taken for the **very** first `step`
+        # is always the second index (or the last index if there is only 1)
+        # This way we can ensure we don't accidentally skip a sigma in
+        # case we start in the middle of the denoising schedule (e.g. for image-to-image)
+        if len(self._index_counter) == 0:
+            pos = 1 if len(indices) > 1 else 0
         else:
-            pos = 0
+            timestep_int = timestep.cpu().item() if torch.is_tensor(timestep) else timestep
+            pos = self._index_counter[timestep_int]
         return indices[pos].item()
+    @property
+    def init_noise_sigma(self):
+        # standard deviation of the initial noise distribution
+        if self.config.timestep_spacing in ["linspace", "trailing"]:
+            return self.sigmas.max()
+        return (self.sigmas.max() ** 2 + 1) ** 0.5
     def scale_model_input(
         self,
         sample: torch.FloatTensor,
@@ -169,7 +208,25 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
         num_train_timesteps = num_train_timesteps or self.config.num_train_timesteps
-        timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=float)[::-1].copy()
+        # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
+        if self.config.timestep_spacing == "linspace":
+            timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=float)[::-1].copy()
+        elif self.config.timestep_spacing == "leading":
+            step_ratio = num_train_timesteps // self.num_inference_steps
+            # creates integer timesteps by multiplying by ratio
+            # casting to int to avoid issues when num_inference_step is power of 3
+            timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(float)
+            timesteps += self.config.steps_offset
+        elif self.config.timestep_spacing == "trailing":
+            step_ratio = num_train_timesteps / self.num_inference_steps
+            # creates integer timesteps by multiplying by ratio
+            # casting to int to avoid issues when num_inference_step is power of 3
+            timesteps = (np.arange(num_train_timesteps, 0, -step_ratio)).round().copy().astype(float)
+            timesteps -= 1
+        else:
+            raise ValueError(
+                f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
+            )
         sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
         self.log_sigmas = torch.from_numpy(np.log(sigmas)).to(device)
@@ -197,9 +254,6 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
         self.sigmas_up = torch.cat([sigmas_up[:1], sigmas_up[1:].repeat_interleave(2), sigmas_up[-1:]])
         self.sigmas_down = torch.cat([sigmas_down[:1], sigmas_down[1:].repeat_interleave(2), sigmas_down[-1:]])
-        # standard deviation of the initial noise distribution
-        self.init_noise_sigma = self.sigmas.max()
         if str(device).startswith("mps"):
             # mps does not support float64
             timesteps = torch.from_numpy(timesteps).to(device, dtype=torch.float32)
@@ -213,6 +267,10 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
         self.sample = None
+        # for exp beta schedules, such as the one for `pipeline_shap_e.py`
+        # we need an index counter
+        self._index_counter = defaultdict(int)
     def sigma_to_t(self, sigma):
         # get log sigma
         log_sigma = sigma.log()
@@ -263,6 +321,10 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
         """
         step_index = self.index_for_timestep(timestep)
+        # advance index counter by 1
+        timestep_int = timestep.cpu().item() if torch.is_tensor(timestep) else timestep
+        self._index_counter[timestep_int] += 1
         if self.state_in_first_order:
             sigma = self.sigmas[step_index]
             sigma_interpol = self.sigmas_interpol[step_index]

diffusers/schedulers/scheduling_k_dpm_2_discrete.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import math
+from collections import defaultdict
 from typing import List, Optional, Tuple, Union
 import numpy as np
@@ -23,7 +24,11 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, Schedul
 # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
-def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999) -> torch.Tensor:
+def betas_for_alpha_bar(
+    num_diffusion_timesteps,
+    max_beta=0.999,
+    alpha_transform_type="cosine",
+):
     """
     Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
     (1-beta) over time from t = [0,1].
@@ -36,19 +41,30 @@ def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999) -> torch.Tensor
         num_diffusion_timesteps (`int`): the number of betas to produce.
         max_beta (`float`): the maximum beta to use; use values lower than 1 to
                      prevent singularities.
+        alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+                     Choose from `cosine` or `exp`
     Returns:
         betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
     """
+    if alpha_transform_type == "cosine":
-    def alpha_bar(time_step):
-        return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
+        def alpha_bar_fn(t):
+            return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
+    elif alpha_transform_type == "exp":
+        def alpha_bar_fn(t):
+            return math.exp(t * -12.0)
+    else:
+        raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
     betas = []
     for i in range(num_diffusion_timesteps):
         t1 = i / num_diffusion_timesteps
         t2 = (i + 1) / num_diffusion_timesteps
-        betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
+        betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
     return torch.tensor(betas, dtype=torch.float32)
@@ -77,6 +93,13 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
             prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
             process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
             https://imagen.research.google/video/paper.pdf)
+        timestep_spacing (`str`, default `"linspace"`):
+            The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
+            Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
+        steps_offset (`int`, default `0`):
+            an offset added to the inference steps. You can use a combination of `offset=1` and
+            `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
+            stable diffusion.
     """
     _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -91,6 +114,8 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
         beta_schedule: str = "linear",
         trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
         prediction_type: str = "epsilon",
+        timestep_spacing: str = "linspace",
+        steps_offset: int = 0,
     ):
         if trained_betas is not None:
             self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -120,12 +145,26 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
         indices = (schedule_timesteps == timestep).nonzero()
-        if self.state_in_first_order:
-            pos = -1
+        # The sigma index that is taken for the **very** first `step`
+        # is always the second index (or the last index if there is only 1)
+        # This way we can ensure we don't accidentally skip a sigma in
+        # case we start in the middle of the denoising schedule (e.g. for image-to-image)
+        if len(self._index_counter) == 0:
+            pos = 1 if len(indices) > 1 else 0
         else:
-            pos = 0
+            timestep_int = timestep.cpu().item() if torch.is_tensor(timestep) else timestep
+            pos = self._index_counter[timestep_int]
         return indices[pos].item()
+    @property
+    def init_noise_sigma(self):
+        # standard deviation of the initial noise distribution
+        if self.config.timestep_spacing in ["linspace", "trailing"]:
+            return self.sigmas.max()
+        return (self.sigmas.max() ** 2 + 1) ** 0.5
     def scale_model_input(
         self,
         sample: torch.FloatTensor,
@@ -168,7 +207,25 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
         num_train_timesteps = num_train_timesteps or self.config.num_train_timesteps
-        timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=float)[::-1].copy()
+        # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
+        if self.config.timestep_spacing == "linspace":
+            timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=float)[::-1].copy()
+        elif self.config.timestep_spacing == "leading":
+            step_ratio = num_train_timesteps // self.num_inference_steps
+            # creates integer timesteps by multiplying by ratio
+            # casting to int to avoid issues when num_inference_step is power of 3
+            timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(float)
+            timesteps += self.config.steps_offset
+        elif self.config.timestep_spacing == "trailing":
+            step_ratio = num_train_timesteps / self.num_inference_steps
+            # creates integer timesteps by multiplying by ratio
+            # casting to int to avoid issues when num_inference_step is power of 3
+            timesteps = (np.arange(num_train_timesteps, 0, -step_ratio)).round().copy().astype(float)
+            timesteps -= 1
+        else:
+            raise ValueError(
+                f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
+            )
         sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
         self.log_sigmas = torch.from_numpy(np.log(sigmas)).to(device)
@@ -185,9 +242,6 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
             [sigmas_interpol[:1], sigmas_interpol[1:].repeat_interleave(2), sigmas_interpol[-1:]]
         )
-        # standard deviation of the initial noise distribution
-        self.init_noise_sigma = self.sigmas.max()
         if str(device).startswith("mps"):
             # mps does not support float64
             timesteps = torch.from_numpy(timesteps).to(device, dtype=torch.float32)
@@ -202,6 +256,10 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
         self.sample = None
+        # for exp beta schedules, such as the one for `pipeline_shap_e.py`
+        # we need an index counter
+        self._index_counter = defaultdict(int)
     def sigma_to_t(self, sigma):
         # get log sigma
         log_sigma = sigma.log()
@@ -251,6 +309,10 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
         """
         step_index = self.index_for_timestep(timestep)
+        # advance index counter by 1
+        timestep_int = timestep.cpu().item() if torch.is_tensor(timestep) else timestep
+        self._index_counter[timestep_int] += 1
         if self.state_in_first_order:
             sigma = self.sigmas[step_index]
             sigma_interpol = self.sigmas_interpol[step_index + 1]

diffusers 0.17.1__py3-none-any.whl → 0.18.2__py3-none-any.whl

diffusers 0.17.1py3-none-any.whl → 0.18.2py3-none-any.whl