PyPI - diffusers - Versions diffs - 0.27.2__py3-none-any.whl → 0.28.1__py3-none-any.whl - Mend

diffusers 0.27.2py3-none-any.whl → 0.28.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (278) hide show

diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py CHANGED Viewed

@@ -197,7 +197,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
             and not isinstance(image, list)
         ):
             raise ValueError(
-                "`image` has to be of type `torch.FloatTensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
+                "`image` has to be of type `torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
                 f" {type(image)}"
             )
@@ -214,7 +214,12 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
     def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
-        shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
+        shape = (
+            batch_size,
+            num_channels_latents,
+            int(height) // self.vae_scale_factor,
+            int(width) // self.vae_scale_factor,
+        )
         if isinstance(generator, list) and len(generator) != batch_size:
             raise ValueError(
                 f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -242,10 +247,10 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
         num_images_per_prompt: Optional[int] = 1,
         eta: float = 0.0,
         generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
-        latents: Optional[torch.FloatTensor] = None,
+        latents: Optional[torch.Tensor] = None,
         output_type: Optional[str] = "pil",
         return_dict: bool = True,
-        callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
+        callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
         callback_steps: int = 1,
         **kwargs,
     ):
@@ -276,7 +281,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
             generator (`torch.Generator`, *optional*):
                 A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
                 generation deterministic.
-            latents (`torch.FloatTensor`, *optional*):
+            latents (`torch.Tensor`, *optional*):
                 Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
                 generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                 tensor is generated by sampling using the supplied random `generator`.
@@ -287,7 +292,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
                 plain tuple.
             callback (`Callable`, *optional*):
                 A function that calls every `callback_steps` steps during inference. The function is called with the
-                following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
+                following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
             callback_steps (`int`, *optional*, defaults to 1):
                 The frequency at which the `callback` function is called. If not specified, the callback is called at
                 every step.

diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py CHANGED Viewed

@@ -300,7 +300,12 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
     def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
-        shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
+        shape = (
+            batch_size,
+            num_channels_latents,
+            int(height) // self.vae_scale_factor,
+            int(width) // self.vae_scale_factor,
+        )
         if isinstance(generator, list) and len(generator) != batch_size:
             raise ValueError(
                 f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -328,10 +333,10 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
         num_images_per_prompt: Optional[int] = 1,
         eta: float = 0.0,
         generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
-        latents: Optional[torch.FloatTensor] = None,
+        latents: Optional[torch.Tensor] = None,
         output_type: Optional[str] = "pil",
         return_dict: bool = True,
-        callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
+        callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
         callback_steps: int = 1,
         **kwargs,
     ):
@@ -362,7 +367,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
             generator (`torch.Generator`, *optional*):
                 A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
                 generation deterministic.
-            latents (`torch.FloatTensor`, *optional*):
+            latents (`torch.Tensor`, *optional*):
                 Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
                 generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                 tensor is generated by sampling using the supplied random `generator`.
@@ -373,7 +378,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
                 plain tuple.
             callback (`Callable`, *optional*):
                 A function that calls every `callback_steps` steps during inference. The function is called with the
-                following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
+                following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
             callback_steps (`int`, *optional*, defaults to 1):
                 The frequency at which the `callback` function is called. If not specified, the callback is called at
                 every step.

diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py CHANGED Viewed

@@ -169,10 +169,10 @@ class VQDiffusionPipeline(DiffusionPipeline):
         truncation_rate: float = 1.0,
         num_images_per_prompt: int = 1,
         generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
-        latents: Optional[torch.FloatTensor] = None,
+        latents: Optional[torch.Tensor] = None,
         output_type: Optional[str] = "pil",
         return_dict: bool = True,
-        callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
+        callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
         callback_steps: int = 1,
     ) -> Union[ImagePipelineOutput, Tuple]:
         """
@@ -196,7 +196,7 @@ class VQDiffusionPipeline(DiffusionPipeline):
             generator (`torch.Generator`, *optional*):
                 A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
                 generation deterministic.
-            latents (`torch.FloatTensor` of shape (batch), *optional*):
+            latents (`torch.Tensor` of shape (batch), *optional*):
                 Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
                 generation. Must be valid embedding indices.If not provided, a latents tensor will be generated of
                 completely masked latent pixels.
@@ -206,7 +206,7 @@ class VQDiffusionPipeline(DiffusionPipeline):
                 Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
             callback (`Callable`, *optional*):
                 A function that calls every `callback_steps` steps during inference. The function is called with the
-                following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
+                following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
             callback_steps (`int`, *optional*, defaults to 1):
                 The frequency at which the `callback` function is called. If not specified, the callback is called at
                 every step.
@@ -301,7 +301,7 @@ class VQDiffusionPipeline(DiffusionPipeline):
         return ImagePipelineOutput(images=image)
-    def truncate(self, log_p_x_0: torch.FloatTensor, truncation_rate: float) -> torch.FloatTensor:
+    def truncate(self, log_p_x_0: torch.Tensor, truncation_rate: float) -> torch.Tensor:
         """
         Truncates `log_p_x_0` such that for each column vector, the total cumulative probability is `truncation_rate`
         The lowest probabilities that would increase the cumulative probability above `truncation_rate` are set to

diffusers/pipelines/dit/pipeline_dit.py CHANGED Viewed

@@ -22,7 +22,7 @@ from typing import Dict, List, Optional, Tuple, Union
 import torch
-from ...models import AutoencoderKL, Transformer2DModel
+from ...models import AutoencoderKL, DiTTransformer2DModel
 from ...schedulers import KarrasDiffusionSchedulers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
@@ -36,8 +36,8 @@ class DiTPipeline(DiffusionPipeline):
     implemented for all pipelines (downloading, saving, running on a particular device, etc.).
     Parameters:
-        transformer ([`Transformer2DModel`]):
-            A class conditioned `Transformer2DModel` to denoise the encoded image latents.
+        transformer ([`DiTTransformer2DModel`]):
+            A class conditioned `DiTTransformer2DModel` to denoise the encoded image latents.
         vae ([`AutoencoderKL`]):
             Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
         scheduler ([`DDIMScheduler`]):
@@ -48,7 +48,7 @@ class DiTPipeline(DiffusionPipeline):
     def __init__(
         self,
-        transformer: Transformer2DModel,
+        transformer: DiTTransformer2DModel,
         vae: AutoencoderKL,
         scheduler: KarrasDiffusionSchedulers,
         id2label: Optional[Dict[int, str]] = None,
@@ -227,6 +227,9 @@ class DiTPipeline(DiffusionPipeline):
         if output_type == "pil":
             samples = self.numpy_to_pil(samples)
+        # Offload all models
+        self.maybe_free_model_hooks()
         if not return_dict:
             return (samples,)

diffusers/pipelines/free_init_utils.py CHANGED Viewed

@@ -41,20 +41,20 @@ class FreeInitMixin:
             num_iters (`int`, *optional*, defaults to `3`):
                 Number of FreeInit noise re-initialization iterations.
             use_fast_sampling (`bool`, *optional*, defaults to `False`):
-                Whether or not to speedup sampling procedure at the cost of probably lower quality results. Enables
-                the "Coarse-to-Fine Sampling" strategy, as mentioned in the paper, if set to `True`.
+                Whether or not to speedup sampling procedure at the cost of probably lower quality results. Enables the
+                "Coarse-to-Fine Sampling" strategy, as mentioned in the paper, if set to `True`.
             method (`str`, *optional*, defaults to `butterworth`):
-                Must be one of `butterworth`, `ideal` or `gaussian` to use as the filtering method for the
-                FreeInit low pass filter.
+                Must be one of `butterworth`, `ideal` or `gaussian` to use as the filtering method for the FreeInit low
+                pass filter.
             order (`int`, *optional*, defaults to `4`):
                 Order of the filter used in `butterworth` method. Larger values lead to `ideal` method behaviour
                 whereas lower values lead to `gaussian` method behaviour.
             spatial_stop_frequency (`float`, *optional*, defaults to `0.25`):
-                Normalized stop frequency for spatial dimensions. Must be between 0 to 1. Referred to as `d_s` in
-                the original implementation.
+                Normalized stop frequency for spatial dimensions. Must be between 0 to 1. Referred to as `d_s` in the
+                original implementation.
             temporal_stop_frequency (`float`, *optional*, defaults to `0.25`):
-                Normalized stop frequency for temporal dimensions. Must be between 0 to 1. Referred to as `d_t` in
-                the original implementation.
+                Normalized stop frequency for temporal dimensions. Must be between 0 to 1. Referred to as `d_t` in the
+                original implementation.
         """
         self._free_init_num_iters = num_iters
         self._free_init_use_fast_sampling = use_fast_sampling
@@ -146,39 +146,40 @@ class FreeInitMixin:
     ):
         if free_init_iteration == 0:
             self._free_init_initial_noise = latents.detach().clone()
-            return latents, self.scheduler.timesteps
-        latent_shape = latents.shape
-        free_init_filter_shape = (1, *latent_shape[1:])
-        free_init_freq_filter = self._get_free_init_freq_filter(
-            shape=free_init_filter_shape,
-            device=device,
-            filter_type=self._free_init_method,
-            order=self._free_init_order,
-            spatial_stop_frequency=self._free_init_spatial_stop_frequency,
-            temporal_stop_frequency=self._free_init_temporal_stop_frequency,
-        )
-        current_diffuse_timestep = self.scheduler.config.num_train_timesteps - 1
-        diffuse_timesteps = torch.full((latent_shape[0],), current_diffuse_timestep).long()
-        z_t = self.scheduler.add_noise(
-            original_samples=latents, noise=self._free_init_initial_noise, timesteps=diffuse_timesteps.to(device)
-        ).to(dtype=torch.float32)
-        z_rand = randn_tensor(
-            shape=latent_shape,
-            generator=generator,
-            device=device,
-            dtype=torch.float32,
-        )
-        latents = self._apply_freq_filter(z_t, z_rand, low_pass_filter=free_init_freq_filter)
-        latents = latents.to(dtype)
+        else:
+            latent_shape = latents.shape
+            free_init_filter_shape = (1, *latent_shape[1:])
+            free_init_freq_filter = self._get_free_init_freq_filter(
+                shape=free_init_filter_shape,
+                device=device,
+                filter_type=self._free_init_method,
+                order=self._free_init_order,
+                spatial_stop_frequency=self._free_init_spatial_stop_frequency,
+                temporal_stop_frequency=self._free_init_temporal_stop_frequency,
+            )
+            current_diffuse_timestep = self.scheduler.config.num_train_timesteps - 1
+            diffuse_timesteps = torch.full((latent_shape[0],), current_diffuse_timestep).long()
+            z_t = self.scheduler.add_noise(
+                original_samples=latents, noise=self._free_init_initial_noise, timesteps=diffuse_timesteps.to(device)
+            ).to(dtype=torch.float32)
+            z_rand = randn_tensor(
+                shape=latent_shape,
+                generator=generator,
+                device=device,
+                dtype=torch.float32,
+            )
+            latents = self._apply_freq_filter(z_t, z_rand, low_pass_filter=free_init_freq_filter)
+            latents = latents.to(dtype)
         # Coarse-to-Fine Sampling for faster inference (can lead to lower quality)
         if self._free_init_use_fast_sampling:
-            num_inference_steps = int(num_inference_steps / self._free_init_num_iters * (free_init_iteration + 1))
+            num_inference_steps = max(
+                1, int(num_inference_steps / self._free_init_num_iters * (free_init_iteration + 1))
+            )
             self.scheduler.set_timesteps(num_inference_steps, device=device)
         return latents, self.scheduler.timesteps

diffusers/pipelines/hunyuandit/__init__.py ADDED Viewed

@@ -0,0 +1,48 @@
+from typing import TYPE_CHECKING
+from ...utils import (
+    DIFFUSERS_SLOW_IMPORT,
+    OptionalDependencyNotAvailable,
+    _LazyModule,
+    get_objects_from_module,
+    is_torch_available,
+    is_transformers_available,
+)
+_dummy_objects = {}
+_import_structure = {}
+try:
+    if not (is_transformers_available() and is_torch_available()):
+        raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+    from ...utils import dummy_torch_and_transformers_objects  # noqa F403
+    _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
+else:
+    _import_structure["pipeline_hunyuandit"] = ["HunyuanDiTPipeline"]
+if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
+    try:
+        if not (is_transformers_available() and is_torch_available()):
+            raise OptionalDependencyNotAvailable()
+    except OptionalDependencyNotAvailable:
+        from ...utils.dummy_torch_and_transformers_objects import *
+    else:
+        from .pipeline_hunyuandit import HunyuanDiTPipeline
+else:
+    import sys
+    sys.modules[__name__] = _LazyModule(
+        __name__,
+        globals()["__file__"],
+        _import_structure,
+        module_spec=__spec__,
+    )
+    for name, value in _dummy_objects.items():
+        setattr(sys.modules[__name__], name, value)

diffusers 0.27.2__py3-none-any.whl → 0.28.1__py3-none-any.whl

diffusers 0.27.2py3-none-any.whl → 0.28.1py3-none-any.whl