PyPI - diffusers - Versions diffs - 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl - Mend

diffusers 0.23.1py3-none-any.whl → 0.25.0py3-none-any.whl

Files changed (238) hide show

diffusers/pipelines/unclip/pipeline_unclip.py CHANGED Viewed

@@ -477,8 +477,9 @@ class UnCLIPPipeline(DiffusionPipeline):
         image = super_res_latents
         # done super res
-        # post processing
+        self.maybe_free_model_hooks()
+        # post processing
         image = image * 0.5 + 0.5
         image = image.clamp(0, 1)
         image = image.cpu().permute(0, 2, 3, 1).float().numpy()

diffusers/pipelines/unclip/pipeline_unclip_image_variation.py CHANGED Viewed

@@ -403,6 +403,7 @@ class UnCLIPImageVariationPipeline(DiffusionPipeline):
         image = super_res_latents
         # done super res
+        self.maybe_free_model_hooks()
         # post processing

diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py CHANGED Viewed

@@ -19,8 +19,8 @@ import torch
 import torch.nn as nn
 from ...configuration_utils import ConfigMixin, register_to_config
+from ...models.autoencoders.vae import DecoderOutput, VectorQuantizer
 from ...models.modeling_utils import ModelMixin
-from ...models.vae import DecoderOutput, VectorQuantizer
 from ...models.vq_model import VQEncoderOutput
 from ...utils.accelerate_utils import apply_forward_hook

diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py CHANGED Viewed

@@ -17,6 +17,8 @@ import torch
 import torch.nn as nn
 from ...models.attention_processor import Attention
+from ...models.lora import LoRACompatibleConv, LoRACompatibleLinear
+from ...utils import USE_PEFT_BACKEND
 class WuerstchenLayerNorm(nn.LayerNorm):
@@ -32,7 +34,8 @@ class WuerstchenLayerNorm(nn.LayerNorm):
 class TimestepBlock(nn.Module):
     def __init__(self, c, c_timestep):
         super().__init__()
-        self.mapper = nn.Linear(c_timestep, c * 2)
+        linear_cls = nn.Linear if USE_PEFT_BACKEND else LoRACompatibleLinear
+        self.mapper = linear_cls(c_timestep, c * 2)
     def forward(self, x, t):
         a, b = self.mapper(t)[:, :, None, None].chunk(2, dim=1)
@@ -42,10 +45,14 @@ class TimestepBlock(nn.Module):
 class ResBlock(nn.Module):
     def __init__(self, c, c_skip=0, kernel_size=3, dropout=0.0):
         super().__init__()
-        self.depthwise = nn.Conv2d(c + c_skip, c, kernel_size=kernel_size, padding=kernel_size // 2, groups=c)
+        conv_cls = nn.Conv2d if USE_PEFT_BACKEND else LoRACompatibleConv
+        linear_cls = nn.Linear if USE_PEFT_BACKEND else LoRACompatibleLinear
+        self.depthwise = conv_cls(c + c_skip, c, kernel_size=kernel_size, padding=kernel_size // 2, groups=c)
         self.norm = WuerstchenLayerNorm(c, elementwise_affine=False, eps=1e-6)
         self.channelwise = nn.Sequential(
-            nn.Linear(c, c * 4), nn.GELU(), GlobalResponseNorm(c * 4), nn.Dropout(dropout), nn.Linear(c * 4, c)
+            linear_cls(c, c * 4), nn.GELU(), GlobalResponseNorm(c * 4), nn.Dropout(dropout), linear_cls(c * 4, c)
         )
     def forward(self, x, x_skip=None):
@@ -73,10 +80,13 @@ class GlobalResponseNorm(nn.Module):
 class AttnBlock(nn.Module):
     def __init__(self, c, c_cond, nhead, self_attn=True, dropout=0.0):
         super().__init__()
+        linear_cls = nn.Linear if USE_PEFT_BACKEND else LoRACompatibleLinear
         self.self_attn = self_attn
         self.norm = WuerstchenLayerNorm(c, elementwise_affine=False, eps=1e-6)
         self.attention = Attention(query_dim=c, heads=nhead, dim_head=c // nhead, dropout=dropout, bias=True)
-        self.kv_mapper = nn.Sequential(nn.SiLU(), nn.Linear(c_cond, c))
+        self.kv_mapper = nn.Sequential(nn.SiLU(), linear_cls(c_cond, c))
     def forward(self, x, kv):
         kv = self.kv_mapper(kv)

diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py CHANGED Viewed

@@ -28,8 +28,9 @@ from ...models.attention_processor import (
     AttnAddedKVProcessor,
     AttnProcessor,
 )
+from ...models.lora import LoRACompatibleConv, LoRACompatibleLinear
 from ...models.modeling_utils import ModelMixin
-from ...utils import is_torch_version
+from ...utils import USE_PEFT_BACKEND, is_torch_version
 from .modeling_wuerstchen_common import AttnBlock, ResBlock, TimestepBlock, WuerstchenLayerNorm
@@ -40,12 +41,15 @@ class WuerstchenPrior(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin):
     @register_to_config
     def __init__(self, c_in=16, c=1280, c_cond=1024, c_r=64, depth=16, nhead=16, dropout=0.1):
         super().__init__()
+        conv_cls = nn.Conv2d if USE_PEFT_BACKEND else LoRACompatibleConv
+        linear_cls = nn.Linear if USE_PEFT_BACKEND else LoRACompatibleLinear
         self.c_r = c_r
-        self.projection = nn.Conv2d(c_in, c, kernel_size=1)
+        self.projection = conv_cls(c_in, c, kernel_size=1)
         self.cond_mapper = nn.Sequential(
-            nn.Linear(c_cond, c),
+            linear_cls(c_cond, c),
             nn.LeakyReLU(0.2),
-            nn.Linear(c, c),
+            linear_cls(c, c),
         )
         self.blocks = nn.ModuleList()
@@ -55,7 +59,7 @@ class WuerstchenPrior(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin):
             self.blocks.append(AttnBlock(c, c, nhead, self_attn=True, dropout=dropout))
         self.out = nn.Sequential(
             WuerstchenLayerNorm(c, elementwise_affine=False, eps=1e-6),
-            nn.Conv2d(c, c_in * 2, kernel_size=1),
+            conv_cls(c, c_in * 2, kernel_size=1),
         )
         self.gradient_checkpointing = False

diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py CHANGED Viewed

@@ -269,7 +269,7 @@ class WuerstchenDecoderPipeline(DiffusionPipeline):
             callback_on_step_end_tensor_inputs (`List`, *optional*):
                 The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                 will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
-                `._callback_tensor_inputs` attribute of your pipeine class.
+                `._callback_tensor_inputs` attribute of your pipeline class.
         Examples:

diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py CHANGED Viewed

@@ -234,7 +234,7 @@ class WuerstchenCombinedPipeline(DiffusionPipeline):
             prior_callback_on_step_end_tensor_inputs (`List`, *optional*):
                 The list of tensor inputs for the `prior_callback_on_step_end` function. The tensors specified in the
                 list will be passed as `callback_kwargs` argument. You will only be able to include variables listed in
-                the `._callback_tensor_inputs` attribute of your pipeine class.
+                the `._callback_tensor_inputs` attribute of your pipeline class.
             callback_on_step_end (`Callable`, *optional*):
                 A function that calls at the end of each denoising steps during the inference. The function is called
                 with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
@@ -243,7 +243,7 @@ class WuerstchenCombinedPipeline(DiffusionPipeline):
             callback_on_step_end_tensor_inputs (`List`, *optional*):
                 The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                 will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
-                `._callback_tensor_inputs` attribute of your pipeine class.
+                `._callback_tensor_inputs` attribute of your pipeline class.
         Examples:

diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py CHANGED Viewed

@@ -69,6 +69,10 @@ class WuerstchenPriorPipeline(DiffusionPipeline, LoraLoaderMixin):
     This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
     library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
+    The pipeline also inherits the following loading methods:
+        - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
+        - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
     Args:
         prior ([`Prior`]):
             The canonical unCLIP prior to approximate the image embedding from the text embedding.
@@ -349,7 +353,7 @@ class WuerstchenPriorPipeline(DiffusionPipeline, LoraLoaderMixin):
             callback_on_step_end_tensor_inputs (`List`, *optional*):
                 The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                 will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
-                `._callback_tensor_inputs` attribute of your pipeine class.
+                `._callback_tensor_inputs` attribute of your pipeline class.
         Examples:

diffusers/schedulers/__init__.py CHANGED Viewed

@@ -38,6 +38,8 @@ except OptionalDependencyNotAvailable:
     _dummy_modules.update(get_objects_from_module(dummy_pt_objects))
 else:
+    _import_structure["deprecated"] = ["KarrasVeScheduler", "ScoreSdeVpScheduler"]
+    _import_structure["scheduling_amused"] = ["AmusedScheduler"]
     _import_structure["scheduling_consistency_decoder"] = ["ConsistencyDecoderScheduler"]
     _import_structure["scheduling_consistency_models"] = ["CMStochasticIterativeScheduler"]
     _import_structure["scheduling_ddim"] = ["DDIMScheduler"]
@@ -56,12 +58,10 @@ else:
     _import_structure["scheduling_ipndm"] = ["IPNDMScheduler"]
     _import_structure["scheduling_k_dpm_2_ancestral_discrete"] = ["KDPM2AncestralDiscreteScheduler"]
     _import_structure["scheduling_k_dpm_2_discrete"] = ["KDPM2DiscreteScheduler"]
-    _import_structure["scheduling_karras_ve"] = ["KarrasVeScheduler"]
     _import_structure["scheduling_lcm"] = ["LCMScheduler"]
     _import_structure["scheduling_pndm"] = ["PNDMScheduler"]
     _import_structure["scheduling_repaint"] = ["RePaintScheduler"]
     _import_structure["scheduling_sde_ve"] = ["ScoreSdeVeScheduler"]
-    _import_structure["scheduling_sde_vp"] = ["ScoreSdeVpScheduler"]
     _import_structure["scheduling_unclip"] = ["UnCLIPScheduler"]
     _import_structure["scheduling_unipc_multistep"] = ["UniPCMultistepScheduler"]
     _import_structure["scheduling_utils"] = ["KarrasDiffusionSchedulers", "SchedulerMixin"]
@@ -129,6 +129,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
     except OptionalDependencyNotAvailable:
         from ..utils.dummy_pt_objects import *  # noqa F403
     else:
+        from .deprecated import KarrasVeScheduler, ScoreSdeVpScheduler
+        from .scheduling_amused import AmusedScheduler
         from .scheduling_consistency_decoder import ConsistencyDecoderScheduler
         from .scheduling_consistency_models import CMStochasticIterativeScheduler
         from .scheduling_ddim import DDIMScheduler
@@ -147,12 +149,10 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
         from .scheduling_ipndm import IPNDMScheduler
         from .scheduling_k_dpm_2_ancestral_discrete import KDPM2AncestralDiscreteScheduler
         from .scheduling_k_dpm_2_discrete import KDPM2DiscreteScheduler
-        from .scheduling_karras_ve import KarrasVeScheduler
         from .scheduling_lcm import LCMScheduler
         from .scheduling_pndm import PNDMScheduler
         from .scheduling_repaint import RePaintScheduler
         from .scheduling_sde_ve import ScoreSdeVeScheduler
-        from .scheduling_sde_vp import ScoreSdeVpScheduler
         from .scheduling_unclip import UnCLIPScheduler
         from .scheduling_unipc_multistep import UniPCMultistepScheduler
         from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin

diffusers/schedulers/deprecated/__init__.py ADDED Viewed

@@ -0,0 +1,50 @@
+from typing import TYPE_CHECKING
+from ...utils import (
+    DIFFUSERS_SLOW_IMPORT,
+    OptionalDependencyNotAvailable,
+    _LazyModule,
+    get_objects_from_module,
+    is_torch_available,
+    is_transformers_available,
+)
+_dummy_objects = {}
+_import_structure = {}
+try:
+    if not (is_transformers_available() and is_torch_available()):
+        raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+    from ...utils import dummy_pt_objects  # noqa F403
+    _dummy_objects.update(get_objects_from_module(dummy_pt_objects))
+else:
+    _import_structure["scheduling_karras_ve"] = ["KarrasVeScheduler"]
+    _import_structure["scheduling_sde_vp"] = ["ScoreSdeVpScheduler"]
+if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
+    try:
+        if not is_torch_available():
+            raise OptionalDependencyNotAvailable()
+    except OptionalDependencyNotAvailable:
+        from ..utils.dummy_pt_objects import *  # noqa F403
+    else:
+        from .scheduling_karras_ve import KarrasVeScheduler
+        from .scheduling_sde_vp import ScoreSdeVpScheduler
+else:
+    import sys
+    sys.modules[__name__] = _LazyModule(
+        __name__,
+        globals()["__file__"],
+        _import_structure,
+        module_spec=__spec__,
+    )
+    for name, value in _dummy_objects.items():
+        setattr(sys.modules[__name__], name, value)

diffusers/schedulers/{scheduling_karras_ve.py → deprecated/scheduling_karras_ve.py} RENAMED Viewed

@@ -19,10 +19,10 @@ from typing import Optional, Tuple, Union
 import numpy as np
 import torch
-from ..configuration_utils import ConfigMixin, register_to_config
-from ..utils import BaseOutput
-from ..utils.torch_utils import randn_tensor
-from .scheduling_utils import SchedulerMixin
+from ...configuration_utils import ConfigMixin, register_to_config
+from ...utils import BaseOutput
+from ...utils.torch_utils import randn_tensor
+from ..scheduling_utils import SchedulerMixin
 @dataclass

diffusers/schedulers/{scheduling_sde_vp.py → deprecated/scheduling_sde_vp.py} RENAMED Viewed

@@ -19,9 +19,9 @@ from typing import Union
 import torch
-from ..configuration_utils import ConfigMixin, register_to_config
-from ..utils.torch_utils import randn_tensor
-from .scheduling_utils import SchedulerMixin
+from ...configuration_utils import ConfigMixin, register_to_config
+from ...utils.torch_utils import randn_tensor
+from ..scheduling_utils import SchedulerMixin
 class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin):
@@ -79,9 +79,7 @@ class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin):
         # TODO(Patrick) better comments + non-PyTorch
         # postprocess model score
-        log_mean_coeff = (
-            -0.25 * t**2 * (self.config.beta_max - self.config.beta_min) - 0.5 * t * self.config.beta_min
-        )
+        log_mean_coeff = -0.25 * t**2 * (self.config.beta_max - self.config.beta_min) - 0.5 * t * self.config.beta_min
         std = torch.sqrt(1.0 - torch.exp(2.0 * log_mean_coeff))
         std = std.flatten()
         while len(std.shape) < len(score.shape):

diffusers/schedulers/scheduling_amused.py ADDED Viewed

@@ -0,0 +1,162 @@
+import math
+from dataclasses import dataclass
+from typing import List, Optional, Tuple, Union
+import torch
+from ..configuration_utils import ConfigMixin, register_to_config
+from ..utils import BaseOutput
+from .scheduling_utils import SchedulerMixin
+def gumbel_noise(t, generator=None):
+    device = generator.device if generator is not None else t.device
+    noise = torch.zeros_like(t, device=device).uniform_(0, 1, generator=generator).to(t.device)
+    return -torch.log((-torch.log(noise.clamp(1e-20))).clamp(1e-20))
+def mask_by_random_topk(mask_len, probs, temperature=1.0, generator=None):
+    confidence = torch.log(probs.clamp(1e-20)) + temperature * gumbel_noise(probs, generator=generator)
+    sorted_confidence = torch.sort(confidence, dim=-1).values
+    cut_off = torch.gather(sorted_confidence, 1, mask_len.long())
+    masking = confidence < cut_off
+    return masking
+@dataclass
+class AmusedSchedulerOutput(BaseOutput):
+    """
+    Output class for the scheduler's `step` function output.
+    Args:
+        prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+            Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
+            denoising loop.
+        pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+            The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
+            `pred_original_sample` can be used to preview progress or for guidance.
+    """
+    prev_sample: torch.FloatTensor
+    pred_original_sample: torch.FloatTensor = None
+class AmusedScheduler(SchedulerMixin, ConfigMixin):
+    order = 1
+    temperatures: torch.Tensor
+    @register_to_config
+    def __init__(
+        self,
+        mask_token_id: int,
+        masking_schedule: str = "cosine",
+    ):
+        self.temperatures = None
+        self.timesteps = None
+    def set_timesteps(
+        self,
+        num_inference_steps: int,
+        temperature: Union[int, Tuple[int, int], List[int]] = (2, 0),
+        device: Union[str, torch.device] = None,
+    ):
+        self.timesteps = torch.arange(num_inference_steps, device=device).flip(0)
+        if isinstance(temperature, (tuple, list)):
+            self.temperatures = torch.linspace(temperature[0], temperature[1], num_inference_steps, device=device)
+        else:
+            self.temperatures = torch.linspace(temperature, 0.01, num_inference_steps, device=device)
+    def step(
+        self,
+        model_output: torch.FloatTensor,
+        timestep: torch.long,
+        sample: torch.LongTensor,
+        starting_mask_ratio: int = 1,
+        generator: Optional[torch.Generator] = None,
+        return_dict: bool = True,
+    ) -> Union[AmusedSchedulerOutput, Tuple]:
+        two_dim_input = sample.ndim == 3 and model_output.ndim == 4
+        if two_dim_input:
+            batch_size, codebook_size, height, width = model_output.shape
+            sample = sample.reshape(batch_size, height * width)
+            model_output = model_output.reshape(batch_size, codebook_size, height * width).permute(0, 2, 1)
+        unknown_map = sample == self.config.mask_token_id
+        probs = model_output.softmax(dim=-1)
+        device = probs.device
+        probs_ = probs.to(generator.device) if generator is not None else probs  # handles when generator is on CPU
+        if probs_.device.type == "cpu" and probs_.dtype != torch.float32:
+            probs_ = probs_.float()  # multinomial is not implemented for cpu half precision
+        probs_ = probs_.reshape(-1, probs.size(-1))
+        pred_original_sample = torch.multinomial(probs_, 1, generator=generator).to(device=device)
+        pred_original_sample = pred_original_sample[:, 0].view(*probs.shape[:-1])
+        pred_original_sample = torch.where(unknown_map, pred_original_sample, sample)
+        if timestep == 0:
+            prev_sample = pred_original_sample
+        else:
+            seq_len = sample.shape[1]
+            step_idx = (self.timesteps == timestep).nonzero()
+            ratio = (step_idx + 1) / len(self.timesteps)
+            if self.config.masking_schedule == "cosine":
+                mask_ratio = torch.cos(ratio * math.pi / 2)
+            elif self.config.masking_schedule == "linear":
+                mask_ratio = 1 - ratio
+            else:
+                raise ValueError(f"unknown masking schedule {self.config.masking_schedule}")
+            mask_ratio = starting_mask_ratio * mask_ratio
+            mask_len = (seq_len * mask_ratio).floor()
+            # do not mask more than amount previously masked
+            mask_len = torch.min(unknown_map.sum(dim=-1, keepdim=True) - 1, mask_len)
+            # mask at least one
+            mask_len = torch.max(torch.tensor([1], device=model_output.device), mask_len)
+            selected_probs = torch.gather(probs, -1, pred_original_sample[:, :, None])[:, :, 0]
+            # Ignores the tokens given in the input by overwriting their confidence.
+            selected_probs = torch.where(unknown_map, selected_probs, torch.finfo(selected_probs.dtype).max)
+            masking = mask_by_random_topk(mask_len, selected_probs, self.temperatures[step_idx], generator)
+            # Masks tokens with lower confidence.
+            prev_sample = torch.where(masking, self.config.mask_token_id, pred_original_sample)
+        if two_dim_input:
+            prev_sample = prev_sample.reshape(batch_size, height, width)
+            pred_original_sample = pred_original_sample.reshape(batch_size, height, width)
+        if not return_dict:
+            return (prev_sample, pred_original_sample)
+        return AmusedSchedulerOutput(prev_sample, pred_original_sample)
+    def add_noise(self, sample, timesteps, generator=None):
+        step_idx = (self.timesteps == timesteps).nonzero()
+        ratio = (step_idx + 1) / len(self.timesteps)
+        if self.config.masking_schedule == "cosine":
+            mask_ratio = torch.cos(ratio * math.pi / 2)
+        elif self.config.masking_schedule == "linear":
+            mask_ratio = 1 - ratio
+        else:
+            raise ValueError(f"unknown masking schedule {self.config.masking_schedule}")
+        mask_indices = (
+            torch.rand(
+                sample.shape, device=generator.device if generator is not None else sample.device, generator=generator
+            ).to(sample.device)
+            < mask_ratio
+        )
+        masked_sample = sample.clone()
+        masked_sample[mask_indices] = self.config.mask_token_id
+        return masked_sample

diffusers/schedulers/scheduling_consistency_models.py CHANGED Viewed

@@ -98,6 +98,7 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
         self.custom_timesteps = False
         self.is_scale_input_called = False
         self._step_index = None
+        self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
     def index_for_timestep(self, timestep, schedule_timesteps=None):
         if schedule_timesteps is None:
@@ -230,6 +231,7 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
             self.timesteps = torch.from_numpy(timesteps).to(device=device)
         self._step_index = None
+        self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
     # Modified _convert_to_karras implementation that takes in ramp as argument
     def _convert_to_karras(self, ramp):

diffusers/schedulers/scheduling_ddim.py CHANGED Viewed

@@ -208,9 +208,7 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
             self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
         elif beta_schedule == "scaled_linear":
             # this schedule is very specific to the latent diffusion model.
-            self.betas = (
-                torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
-            )
+            self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
         elif beta_schedule == "squaredcos_cap_v2":
             # Glide cosine schedule
             self.betas = betas_for_alpha_bar(num_train_timesteps)

diffusers/schedulers/scheduling_ddim_inverse.py CHANGED Viewed

@@ -204,9 +204,7 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
             self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
         elif beta_schedule == "scaled_linear":
             # this schedule is very specific to the latent diffusion model.
-            self.betas = (
-                torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
-            )
+            self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
         elif beta_schedule == "squaredcos_cap_v2":
             # Glide cosine schedule
             self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -295,9 +293,6 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
         model_output: torch.FloatTensor,
         timestep: int,
         sample: torch.FloatTensor,
-        eta: float = 0.0,
-        use_clipped_model_output: bool = False,
-        variance_noise: Optional[torch.FloatTensor] = None,
         return_dict: bool = True,
     ) -> Union[DDIMSchedulerOutput, Tuple]:
         """
@@ -334,7 +329,7 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
         # 1. get previous step value (=t+1)
         prev_timestep = timestep
         timestep = min(
-            timestep - self.config.num_train_timesteps // self.num_inference_steps, self.num_train_timesteps - 1
+            timestep - self.config.num_train_timesteps // self.num_inference_steps, self.config.num_train_timesteps - 1
         )
         # 2. compute alphas, betas

diffusers/schedulers/scheduling_ddim_parallel.py CHANGED Viewed

@@ -215,9 +215,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
             self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
         elif beta_schedule == "scaled_linear":
             # this schedule is very specific to the latent diffusion model.
-            self.betas = (
-                torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
-            )
+            self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
         elif beta_schedule == "squaredcos_cap_v2":
             # Glide cosine schedule
             self.betas = betas_for_alpha_bar(num_train_timesteps)

diffusers/schedulers/scheduling_ddpm.py CHANGED Viewed

@@ -89,6 +89,43 @@ def betas_for_alpha_bar(
     return torch.tensor(betas, dtype=torch.float32)
+# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
+def rescale_zero_terminal_snr(betas):
+    """
+    Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
+    Args:
+        betas (`torch.FloatTensor`):
+            the betas that the scheduler is being initialized with.
+    Returns:
+        `torch.FloatTensor`: rescaled betas with zero terminal SNR
+    """
+    # Convert betas to alphas_bar_sqrt
+    alphas = 1.0 - betas
+    alphas_cumprod = torch.cumprod(alphas, dim=0)
+    alphas_bar_sqrt = alphas_cumprod.sqrt()
+    # Store old values.
+    alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
+    alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
+    # Shift so the last timestep is zero.
+    alphas_bar_sqrt -= alphas_bar_sqrt_T
+    # Scale so the first timestep is back to the old value.
+    alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
+    # Convert alphas_bar_sqrt to betas
+    alphas_bar = alphas_bar_sqrt**2  # Revert sqrt
+    alphas = alphas_bar[1:] / alphas_bar[:-1]  # Revert cumprod
+    alphas = torch.cat([alphas_bar[0:1], alphas])
+    betas = 1 - alphas
+    return betas
 class DDPMScheduler(SchedulerMixin, ConfigMixin):
     """
     `DDPMScheduler` explores the connections between denoising score matching and Langevin dynamics sampling.
@@ -131,6 +168,10 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
             An offset added to the inference steps. You can use a combination of `offset=1` and
             `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
             Diffusion.
+        rescale_betas_zero_snr (`bool`, defaults to `False`):
+            Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
+            dark samples instead of limiting it to samples with medium brightness. Loosely related to
+            [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
     """
     _compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -153,6 +194,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
         sample_max_value: float = 1.0,
         timestep_spacing: str = "leading",
         steps_offset: int = 0,
+        rescale_betas_zero_snr: int = False,
     ):
         if trained_betas is not None:
             self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -160,9 +202,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
             self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
         elif beta_schedule == "scaled_linear":
             # this schedule is very specific to the latent diffusion model.
-            self.betas = (
-                torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
-            )
+            self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
         elif beta_schedule == "squaredcos_cap_v2":
             # Glide cosine schedule
             self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -173,6 +213,10 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
         else:
             raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
+        # Rescale for zero SNR
+        if rescale_betas_zero_snr:
+            self.betas = rescale_zero_terminal_snr(self.betas)
         self.alphas = 1.0 - self.betas
         self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
         self.one = torch.tensor(1.0)

diffusers 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl

diffusers 0.23.1py3-none-any.whl → 0.25.0py3-none-any.whl