PyPI - diffusers - Versions diffs - 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl - Mend

diffusers 0.27.1py3-none-any.whl → 0.28.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (270) hide show

diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py CHANGED Viewed

@@ -40,15 +40,13 @@ class WuerstchenPrior(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin, Peft
     @register_to_config
     def __init__(self, c_in=16, c=1280, c_cond=1024, c_r=64, depth=16, nhead=16, dropout=0.1):
         super().__init__()
-        conv_cls = nn.Conv2d
-        linear_cls = nn.Linear
         self.c_r = c_r
-        self.projection = conv_cls(c_in, c, kernel_size=1)
+        self.projection = nn.Conv2d(c_in, c, kernel_size=1)
         self.cond_mapper = nn.Sequential(
-            linear_cls(c_cond, c),
+            nn.Linear(c_cond, c),
             nn.LeakyReLU(0.2),
-            linear_cls(c, c),
+            nn.Linear(c, c),
         )
         self.blocks = nn.ModuleList()
@@ -58,7 +56,7 @@ class WuerstchenPrior(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin, Peft
             self.blocks.append(AttnBlock(c, c, nhead, self_attn=True, dropout=dropout))
         self.out = nn.Sequential(
             WuerstchenLayerNorm(c, elementwise_affine=False, eps=1e-6),
-            conv_cls(c, c_in * 2, kernel_size=1),
+            nn.Conv2d(c, c_in * 2, kernel_size=1),
         )
         self.gradient_checkpointing = False

diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py CHANGED Viewed

@@ -209,7 +209,7 @@ class WuerstchenDecoderPipeline(DiffusionPipeline):
     @replace_example_docstring(EXAMPLE_DOC_STRING)
     def __call__(
         self,
-        image_embeddings: Union[torch.FloatTensor, List[torch.FloatTensor]],
+        image_embeddings: Union[torch.Tensor, List[torch.Tensor]],
         prompt: Union[str, List[str]] = None,
         num_inference_steps: int = 12,
         timesteps: Optional[List[float]] = None,
@@ -217,7 +217,7 @@ class WuerstchenDecoderPipeline(DiffusionPipeline):
         negative_prompt: Optional[Union[str, List[str]]] = None,
         num_images_per_prompt: int = 1,
         generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
-        latents: Optional[torch.FloatTensor] = None,
+        latents: Optional[torch.Tensor] = None,
         output_type: Optional[str] = "pil",
         return_dict: bool = True,
         callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
@@ -228,7 +228,7 @@ class WuerstchenDecoderPipeline(DiffusionPipeline):
         Function invoked when calling the pipeline for generation.
         Args:
-            image_embedding (`torch.FloatTensor` or `List[torch.FloatTensor]`):
+            image_embedding (`torch.Tensor` or `List[torch.Tensor]`):
                 Image Embeddings either extracted from an image or generated by a Prior Model.
             prompt (`str` or `List[str]`):
                 The prompt or prompts to guide the image generation.
@@ -252,7 +252,7 @@ class WuerstchenDecoderPipeline(DiffusionPipeline):
             generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
                 One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
                 to make generation deterministic.
-            latents (`torch.FloatTensor`, *optional*):
+            latents (`torch.Tensor`, *optional*):
                 Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                 generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                 tensor will ge generated by sampling using the supplied random `generator`.

diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py CHANGED Viewed

@@ -112,25 +112,25 @@ class WuerstchenCombinedPipeline(DiffusionPipeline):
     def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
         self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
-    def enable_model_cpu_offload(self, gpu_id=0):
+    def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
         r"""
         Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
         to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
         method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
         `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
         """
-        self.prior_pipe.enable_model_cpu_offload(gpu_id=gpu_id)
-        self.decoder_pipe.enable_model_cpu_offload(gpu_id=gpu_id)
+        self.prior_pipe.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
+        self.decoder_pipe.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
-    def enable_sequential_cpu_offload(self, gpu_id=0):
+    def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
         r"""
         Offloads all models (`unet`, `text_encoder`, `vae`, and `safety checker` state dicts) to CPU using 🤗
         Accelerate, significantly reducing memory usage. Models are moved to a `torch.device('meta')` and loaded on a
         GPU only when their specific submodule's `forward` method is called. Offloading happens on a submodule basis.
         Memory savings are higher than using `enable_model_cpu_offload`, but performance is lower.
         """
-        self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
-        self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
+        self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
+        self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
     def progress_bar(self, iterable=None, total=None):
         self.prior_pipe.progress_bar(iterable=iterable, total=total)
@@ -154,11 +154,11 @@ class WuerstchenCombinedPipeline(DiffusionPipeline):
         decoder_timesteps: Optional[List[float]] = None,
         decoder_guidance_scale: float = 0.0,
         negative_prompt: Optional[Union[str, List[str]]] = None,
-        prompt_embeds: Optional[torch.FloatTensor] = None,
-        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        prompt_embeds: Optional[torch.Tensor] = None,
+        negative_prompt_embeds: Optional[torch.Tensor] = None,
         num_images_per_prompt: int = 1,
         generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
-        latents: Optional[torch.FloatTensor] = None,
+        latents: Optional[torch.Tensor] = None,
         output_type: Optional[str] = "pil",
         return_dict: bool = True,
         prior_callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
@@ -176,10 +176,10 @@ class WuerstchenCombinedPipeline(DiffusionPipeline):
             negative_prompt (`str` or `List[str]`, *optional*):
                 The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
                 if `guidance_scale` is less than `1`).
-            prompt_embeds (`torch.FloatTensor`, *optional*):
+            prompt_embeds (`torch.Tensor`, *optional*):
                 Pre-generated text embeddings for the prior. Can be used to easily tweak text inputs, *e.g.* prompt
                 weighting. If not provided, text embeddings will be generated from `prompt` input argument.
-            negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+            negative_prompt_embeds (`torch.Tensor`, *optional*):
                 Pre-generated negative text embeddings for the prior. Can be used to easily tweak text inputs, *e.g.*
                 prompt weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt`
                 input argument.
@@ -218,7 +218,7 @@ class WuerstchenCombinedPipeline(DiffusionPipeline):
             generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
                 One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
                 to make generation deterministic.
-            latents (`torch.FloatTensor`, *optional*):
+            latents (`torch.Tensor`, *optional*):
                 Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                 generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                 tensor will ge generated by sampling using the supplied random `generator`.

diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py CHANGED Viewed

@@ -54,12 +54,12 @@ class WuerstchenPriorPipelineOutput(BaseOutput):
     Output class for WuerstchenPriorPipeline.
     Args:
-        image_embeddings (`torch.FloatTensor` or `np.ndarray`)
+        image_embeddings (`torch.Tensor` or `np.ndarray`)
             Prior image embeddings for text prompt
     """
-    image_embeddings: Union[torch.FloatTensor, np.ndarray]
+    image_embeddings: Union[torch.Tensor, np.ndarray]
 class WuerstchenPriorPipeline(DiffusionPipeline, LoraLoaderMixin):
@@ -136,8 +136,8 @@ class WuerstchenPriorPipeline(DiffusionPipeline, LoraLoaderMixin):
         do_classifier_free_guidance,
         prompt=None,
         negative_prompt=None,
-        prompt_embeds: Optional[torch.FloatTensor] = None,
-        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        prompt_embeds: Optional[torch.Tensor] = None,
+        negative_prompt_embeds: Optional[torch.Tensor] = None,
     ):
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -288,11 +288,11 @@ class WuerstchenPriorPipeline(DiffusionPipeline, LoraLoaderMixin):
         timesteps: List[float] = None,
         guidance_scale: float = 8.0,
         negative_prompt: Optional[Union[str, List[str]]] = None,
-        prompt_embeds: Optional[torch.FloatTensor] = None,
-        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        prompt_embeds: Optional[torch.Tensor] = None,
+        negative_prompt_embeds: Optional[torch.Tensor] = None,
         num_images_per_prompt: Optional[int] = 1,
         generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
-        latents: Optional[torch.FloatTensor] = None,
+        latents: Optional[torch.Tensor] = None,
         output_type: Optional[str] = "pt",
         return_dict: bool = True,
         callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
@@ -324,10 +324,10 @@ class WuerstchenPriorPipeline(DiffusionPipeline, LoraLoaderMixin):
             negative_prompt (`str` or `List[str]`, *optional*):
                 The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
                 if `decoder_guidance_scale` is less than `1`).
-            prompt_embeds (`torch.FloatTensor`, *optional*):
+            prompt_embeds (`torch.Tensor`, *optional*):
                 Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                 provided, text embeddings will be generated from `prompt` input argument.
-            negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+            negative_prompt_embeds (`torch.Tensor`, *optional*):
                 Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
                 weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
                 argument.
@@ -336,7 +336,7 @@ class WuerstchenPriorPipeline(DiffusionPipeline, LoraLoaderMixin):
             generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
                 One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
                 to make generation deterministic.
-            latents (`torch.FloatTensor`, *optional*):
+            latents (`torch.Tensor`, *optional*):
                 Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                 generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                 tensor will ge generated by sampling using the supplied random `generator`.

diffusers/schedulers/__init__.py CHANGED Viewed

@@ -68,7 +68,7 @@ else:
     _import_structure["scheduling_tcd"] = ["TCDScheduler"]
     _import_structure["scheduling_unclip"] = ["UnCLIPScheduler"]
     _import_structure["scheduling_unipc_multistep"] = ["UniPCMultistepScheduler"]
-    _import_structure["scheduling_utils"] = ["KarrasDiffusionSchedulers", "SchedulerMixin"]
+    _import_structure["scheduling_utils"] = ["AysSchedules", "KarrasDiffusionSchedulers", "SchedulerMixin"]
     _import_structure["scheduling_vq_diffusion"] = ["VQDiffusionScheduler"]
 try:
@@ -163,7 +163,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
         from .scheduling_tcd import TCDScheduler
         from .scheduling_unclip import UnCLIPScheduler
         from .scheduling_unipc_multistep import UniPCMultistepScheduler
-        from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
+        from .scheduling_utils import AysSchedules, KarrasDiffusionSchedulers, SchedulerMixin
         from .scheduling_vq_diffusion import VQDiffusionScheduler
     try:

diffusers/schedulers/deprecated/__init__.py CHANGED Viewed

@@ -30,7 +30,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
             raise OptionalDependencyNotAvailable()
     except OptionalDependencyNotAvailable:
-        from ..utils.dummy_pt_objects import *  # noqa F403
+        from ...utils.dummy_pt_objects import *  # noqa F403
     else:
         from .scheduling_karras_ve import KarrasVeScheduler
         from .scheduling_sde_vp import ScoreSdeVpScheduler

diffusers/schedulers/deprecated/scheduling_karras_ve.py CHANGED Viewed

@@ -31,19 +31,19 @@ class KarrasVeOutput(BaseOutput):
     Output class for the scheduler's step function output.
     Args:
-        prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+        prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
             Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
             denoising loop.
-        derivative (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+        derivative (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
             Derivative of predicted original image sample (x_0).
-        pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+        pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
             The predicted denoised sample (x_{0}) based on the model output from the current timestep.
             `pred_original_sample` can be used to preview progress or for guidance.
     """
-    prev_sample: torch.FloatTensor
-    derivative: torch.FloatTensor
-    pred_original_sample: Optional[torch.FloatTensor] = None
+    prev_sample: torch.Tensor
+    derivative: torch.Tensor
+    pred_original_sample: Optional[torch.Tensor] = None
 class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
@@ -94,21 +94,21 @@ class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
         # setable values
         self.num_inference_steps: int = None
         self.timesteps: np.IntTensor = None
-        self.schedule: torch.FloatTensor = None  # sigma(t_i)
+        self.schedule: torch.Tensor = None  # sigma(t_i)
-    def scale_model_input(self, sample: torch.FloatTensor, timestep: Optional[int] = None) -> torch.FloatTensor:
+    def scale_model_input(self, sample: torch.Tensor, timestep: Optional[int] = None) -> torch.Tensor:
         """
         Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
         current timestep.
         Args:
-            sample (`torch.FloatTensor`):
+            sample (`torch.Tensor`):
                 The input sample.
             timestep (`int`, *optional*):
                 The current timestep in the diffusion chain.
         Returns:
-            `torch.FloatTensor`:
+            `torch.Tensor`:
                 A scaled input sample.
         """
         return sample
@@ -136,14 +136,14 @@ class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
         self.schedule = torch.tensor(schedule, dtype=torch.float32, device=device)
     def add_noise_to_input(
-        self, sample: torch.FloatTensor, sigma: float, generator: Optional[torch.Generator] = None
-    ) -> Tuple[torch.FloatTensor, float]:
+        self, sample: torch.Tensor, sigma: float, generator: Optional[torch.Generator] = None
+    ) -> Tuple[torch.Tensor, float]:
         """
         Explicit Langevin-like "churn" step of adding noise to the sample according to a `gamma_i ≥ 0` to reach a
         higher noise level `sigma_hat = sigma_i + gamma_i*sigma_i`.
         Args:
-            sample (`torch.FloatTensor`):
+            sample (`torch.Tensor`):
                 The input sample.
             sigma (`float`):
             generator (`torch.Generator`, *optional*):
@@ -163,10 +163,10 @@ class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
     def step(
         self,
-        model_output: torch.FloatTensor,
+        model_output: torch.Tensor,
         sigma_hat: float,
         sigma_prev: float,
-        sample_hat: torch.FloatTensor,
+        sample_hat: torch.Tensor,
         return_dict: bool = True,
     ) -> Union[KarrasVeOutput, Tuple]:
         """
@@ -174,11 +174,11 @@ class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
         process from the learned model outputs (most often the predicted noise).
         Args:
-            model_output (`torch.FloatTensor`):
+            model_output (`torch.Tensor`):
                 The direct output from learned diffusion model.
             sigma_hat (`float`):
             sigma_prev (`float`):
-            sample_hat (`torch.FloatTensor`):
+            sample_hat (`torch.Tensor`):
             return_dict (`bool`, *optional*, defaults to `True`):
                 Whether or not to return a [`~schedulers.scheduling_karras_ve.KarrasVESchedulerOutput`] or `tuple`.
@@ -202,25 +202,25 @@ class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
     def step_correct(
         self,
-        model_output: torch.FloatTensor,
+        model_output: torch.Tensor,
         sigma_hat: float,
         sigma_prev: float,
-        sample_hat: torch.FloatTensor,
-        sample_prev: torch.FloatTensor,
-        derivative: torch.FloatTensor,
+        sample_hat: torch.Tensor,
+        sample_prev: torch.Tensor,
+        derivative: torch.Tensor,
         return_dict: bool = True,
     ) -> Union[KarrasVeOutput, Tuple]:
         """
         Corrects the predicted sample based on the `model_output` of the network.
         Args:
-            model_output (`torch.FloatTensor`):
+            model_output (`torch.Tensor`):
                 The direct output from learned diffusion model.
             sigma_hat (`float`): TODO
             sigma_prev (`float`): TODO
-            sample_hat (`torch.FloatTensor`): TODO
-            sample_prev (`torch.FloatTensor`): TODO
-            derivative (`torch.FloatTensor`): TODO
+            sample_hat (`torch.Tensor`): TODO
+            sample_prev (`torch.Tensor`): TODO
+            derivative (`torch.Tensor`): TODO
             return_dict (`bool`, *optional*, defaults to `True`):
                 Whether or not to return a [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`.

diffusers/schedulers/scheduling_amused.py CHANGED Viewed

@@ -29,16 +29,16 @@ class AmusedSchedulerOutput(BaseOutput):
     Output class for the scheduler's `step` function output.
     Args:
-        prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+        prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
             Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
             denoising loop.
-        pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+        pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
             The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
             `pred_original_sample` can be used to preview progress or for guidance.
     """
-    prev_sample: torch.FloatTensor
-    pred_original_sample: torch.FloatTensor = None
+    prev_sample: torch.Tensor
+    pred_original_sample: torch.Tensor = None
 class AmusedScheduler(SchedulerMixin, ConfigMixin):
@@ -70,7 +70,7 @@ class AmusedScheduler(SchedulerMixin, ConfigMixin):
     def step(
         self,
-        model_output: torch.FloatTensor,
+        model_output: torch.Tensor,
         timestep: torch.long,
         sample: torch.LongTensor,
         starting_mask_ratio: int = 1,

diffusers/schedulers/scheduling_consistency_decoder.py CHANGED Viewed

@@ -45,7 +45,7 @@ def betas_for_alpha_bar(
             return math.exp(t * -12.0)
     else:
-        raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
+        raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}")
     betas = []
     for i in range(num_diffusion_timesteps):
@@ -61,12 +61,12 @@ class ConsistencyDecoderSchedulerOutput(BaseOutput):
     Output class for the scheduler's `step` function.
     Args:
-        prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+        prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
             Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
             denoising loop.
     """
-    prev_sample: torch.FloatTensor
+    prev_sample: torch.Tensor
 class ConsistencyDecoderScheduler(SchedulerMixin, ConfigMixin):
@@ -113,28 +113,28 @@ class ConsistencyDecoderScheduler(SchedulerMixin, ConfigMixin):
     def init_noise_sigma(self):
         return self.sqrt_one_minus_alphas_cumprod[self.timesteps[0]]
-    def scale_model_input(self, sample: torch.FloatTensor, timestep: Optional[int] = None) -> torch.FloatTensor:
+    def scale_model_input(self, sample: torch.Tensor, timestep: Optional[int] = None) -> torch.Tensor:
         """
         Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
         current timestep.
         Args:
-            sample (`torch.FloatTensor`):
+            sample (`torch.Tensor`):
                 The input sample.
             timestep (`int`, *optional*):
                 The current timestep in the diffusion chain.
         Returns:
-            `torch.FloatTensor`:
+            `torch.Tensor`:
                 A scaled input sample.
         """
         return sample * self.c_in[timestep]
     def step(
         self,
-        model_output: torch.FloatTensor,
-        timestep: Union[float, torch.FloatTensor],
-        sample: torch.FloatTensor,
+        model_output: torch.Tensor,
+        timestep: Union[float, torch.Tensor],
+        sample: torch.Tensor,
         generator: Optional[torch.Generator] = None,
         return_dict: bool = True,
     ) -> Union[ConsistencyDecoderSchedulerOutput, Tuple]:
@@ -143,11 +143,11 @@ class ConsistencyDecoderScheduler(SchedulerMixin, ConfigMixin):
         process from the learned model outputs (most often the predicted noise).
         Args:
-            model_output (`torch.FloatTensor`):
+            model_output (`torch.Tensor`):
                 The direct output from the learned diffusion model.
             timestep (`float`):
                 The current timestep in the diffusion chain.
-            sample (`torch.FloatTensor`):
+            sample (`torch.Tensor`):
                 A current instance of a sample created by the diffusion process.
             generator (`torch.Generator`, *optional*):
                 A random number generator.

diffusers/schedulers/scheduling_consistency_models.py CHANGED Viewed

@@ -33,12 +33,12 @@ class CMStochasticIterativeSchedulerOutput(BaseOutput):
     Output class for the scheduler's `step` function.
     Args:
-        prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+        prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
             Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
             denoising loop.
     """
-    prev_sample: torch.FloatTensor
+    prev_sample: torch.Tensor
 class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
@@ -104,7 +104,7 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
     @property
     def step_index(self):
         """
-        The index counter for current timestep. It will increae 1 after each scheduler step.
+        The index counter for current timestep. It will increase 1 after each scheduler step.
         """
         return self._step_index
@@ -126,20 +126,18 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
         """
         self._begin_index = begin_index
-    def scale_model_input(
-        self, sample: torch.FloatTensor, timestep: Union[float, torch.FloatTensor]
-    ) -> torch.FloatTensor:
+    def scale_model_input(self, sample: torch.Tensor, timestep: Union[float, torch.Tensor]) -> torch.Tensor:
         """
         Scales the consistency model input by `(sigma**2 + sigma_data**2) ** 0.5`.
         Args:
-            sample (`torch.FloatTensor`):
+            sample (`torch.Tensor`):
                 The input sample.
-            timestep (`float` or `torch.FloatTensor`):
+            timestep (`float` or `torch.Tensor`):
                 The current timestep in the diffusion chain.
         Returns:
-            `torch.FloatTensor`:
+            `torch.Tensor`:
                 A scaled input sample.
         """
         # Get sigma corresponding to timestep
@@ -233,7 +231,7 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
         sigmas = self._convert_to_karras(ramp)
         timesteps = self.sigma_to_t(sigmas)
-        sigmas = np.concatenate([sigmas, [self.sigma_min]]).astype(np.float32)
+        sigmas = np.concatenate([sigmas, [self.config.sigma_min]]).astype(np.float32)
         self.sigmas = torch.from_numpy(sigmas).to(device=device)
         if str(device).startswith("mps"):
@@ -278,7 +276,7 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
         </Tip>
         Args:
-            sigma (`torch.FloatTensor`):
+            sigma (`torch.Tensor`):
                 The current sigma in the Karras sigma schedule.
         Returns:
@@ -319,9 +317,9 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
     def step(
         self,
-        model_output: torch.FloatTensor,
-        timestep: Union[float, torch.FloatTensor],
-        sample: torch.FloatTensor,
+        model_output: torch.Tensor,
+        timestep: Union[float, torch.Tensor],
+        sample: torch.Tensor,
         generator: Optional[torch.Generator] = None,
         return_dict: bool = True,
     ) -> Union[CMStochasticIterativeSchedulerOutput, Tuple]:
@@ -330,11 +328,11 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
         process from the learned model outputs (most often the predicted noise).
         Args:
-            model_output (`torch.FloatTensor`):
+            model_output (`torch.Tensor`):
                 The direct output from the learned diffusion model.
             timestep (`float`):
                 The current timestep in the diffusion chain.
-            sample (`torch.FloatTensor`):
+            sample (`torch.Tensor`):
                 A current instance of a sample created by the diffusion process.
             generator (`torch.Generator`, *optional*):
                 A random number generator.
@@ -349,11 +347,7 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
                 otherwise a tuple is returned where the first element is the sample tensor.
         """
-        if (
-            isinstance(timestep, int)
-            or isinstance(timestep, torch.IntTensor)
-            or isinstance(timestep, torch.LongTensor)
-        ):
+        if isinstance(timestep, (int, torch.IntTensor, torch.LongTensor)):
             raise ValueError(
                 (
                     "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to"
@@ -417,10 +411,10 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
     # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.add_noise
     def add_noise(
         self,
-        original_samples: torch.FloatTensor,
-        noise: torch.FloatTensor,
-        timesteps: torch.FloatTensor,
-    ) -> torch.FloatTensor:
+        original_samples: torch.Tensor,
+        noise: torch.Tensor,
+        timesteps: torch.Tensor,
+    ) -> torch.Tensor:
         # Make sure sigmas and timesteps have the same device and dtype as original_samples
         sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype)
         if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
@@ -434,7 +428,11 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
         # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
         if self.begin_index is None:
             step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
+        elif self.step_index is not None:
+            # add_noise is called after first denoising step (for inpainting)
+            step_indices = [self.step_index] * timesteps.shape[0]
         else:
+            # add noise is called before first denoising step to create initial latent(img2img)
             step_indices = [self.begin_index] * timesteps.shape[0]
         sigma = sigmas[step_indices].flatten()

diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl

diffusers 0.27.1py3-none-any.whl → 0.28.0py3-none-any.whl