diffusers 0.30.3__py3-none-any.whl → 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +97 -4
- diffusers/callbacks.py +56 -3
- diffusers/configuration_utils.py +13 -1
- diffusers/image_processor.py +282 -71
- diffusers/loaders/__init__.py +24 -3
- diffusers/loaders/ip_adapter.py +543 -16
- diffusers/loaders/lora_base.py +138 -125
- diffusers/loaders/lora_conversion_utils.py +647 -0
- diffusers/loaders/lora_pipeline.py +2216 -230
- diffusers/loaders/peft.py +380 -0
- diffusers/loaders/single_file_model.py +71 -4
- diffusers/loaders/single_file_utils.py +597 -10
- diffusers/loaders/textual_inversion.py +5 -3
- diffusers/loaders/transformer_flux.py +181 -0
- diffusers/loaders/transformer_sd3.py +89 -0
- diffusers/loaders/unet.py +56 -12
- diffusers/models/__init__.py +49 -12
- diffusers/models/activations.py +22 -9
- diffusers/models/adapter.py +53 -53
- diffusers/models/attention.py +98 -13
- diffusers/models/attention_flax.py +1 -1
- diffusers/models/attention_processor.py +2160 -346
- diffusers/models/autoencoders/__init__.py +5 -0
- diffusers/models/autoencoders/autoencoder_dc.py +620 -0
- diffusers/models/autoencoders/autoencoder_kl.py +73 -12
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +1149 -0
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +213 -105
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +1176 -0
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +1338 -0
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +1166 -0
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +3 -10
- diffusers/models/autoencoders/autoencoder_tiny.py +4 -2
- diffusers/models/autoencoders/vae.py +18 -5
- diffusers/models/controlnet.py +47 -802
- diffusers/models/controlnet_flux.py +70 -0
- diffusers/models/controlnet_sd3.py +26 -376
- diffusers/models/controlnet_sparsectrl.py +46 -719
- diffusers/models/controlnets/__init__.py +23 -0
- diffusers/models/controlnets/controlnet.py +872 -0
- diffusers/models/{controlnet_flax.py → controlnets/controlnet_flax.py} +5 -5
- diffusers/models/controlnets/controlnet_flux.py +536 -0
- diffusers/models/{controlnet_hunyuan.py → controlnets/controlnet_hunyuan.py} +7 -7
- diffusers/models/controlnets/controlnet_sd3.py +489 -0
- diffusers/models/controlnets/controlnet_sparsectrl.py +788 -0
- diffusers/models/controlnets/controlnet_union.py +832 -0
- diffusers/models/{controlnet_xs.py → controlnets/controlnet_xs.py} +14 -13
- diffusers/models/controlnets/multicontrolnet.py +183 -0
- diffusers/models/embeddings.py +996 -92
- diffusers/models/embeddings_flax.py +23 -9
- diffusers/models/model_loading_utils.py +264 -14
- diffusers/models/modeling_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +334 -51
- diffusers/models/normalization.py +157 -13
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +3 -2
- diffusers/models/transformers/cogvideox_transformer_3d.py +69 -13
- diffusers/models/transformers/dit_transformer_2d.py +1 -1
- diffusers/models/transformers/latte_transformer_3d.py +4 -4
- diffusers/models/transformers/pixart_transformer_2d.py +10 -2
- diffusers/models/transformers/sana_transformer.py +488 -0
- diffusers/models/transformers/stable_audio_transformer.py +1 -1
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +422 -0
- diffusers/models/transformers/transformer_cogview3plus.py +386 -0
- diffusers/models/transformers/transformer_flux.py +189 -51
- diffusers/models/transformers/transformer_hunyuan_video.py +789 -0
- diffusers/models/transformers/transformer_ltx.py +469 -0
- diffusers/models/transformers/transformer_mochi.py +499 -0
- diffusers/models/transformers/transformer_sd3.py +112 -18
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +8 -1
- diffusers/models/unets/unet_2d_blocks.py +88 -21
- diffusers/models/unets/unet_2d_condition.py +9 -9
- diffusers/models/unets/unet_3d_blocks.py +9 -7
- diffusers/models/unets/unet_motion_model.py +46 -68
- diffusers/models/unets/unet_spatio_temporal_condition.py +23 -0
- diffusers/models/unets/unet_stable_cascade.py +2 -2
- diffusers/models/unets/uvit_2d.py +1 -1
- diffusers/models/upsampling.py +14 -6
- diffusers/pipelines/__init__.py +69 -6
- diffusers/pipelines/allegro/__init__.py +48 -0
- diffusers/pipelines/allegro/pipeline_allegro.py +938 -0
- diffusers/pipelines/allegro/pipeline_output.py +23 -0
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +45 -21
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +52 -22
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +18 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +3 -1
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +104 -72
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +3 -3
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +2 -9
- diffusers/pipelines/auto_pipeline.py +88 -10
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/cogvideo/__init__.py +2 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +80 -39
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +825 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +108 -50
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +89 -50
- diffusers/pipelines/cogview3/__init__.py +47 -0
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
- diffusers/pipelines/cogview3/pipeline_output.py +21 -0
- diffusers/pipelines/controlnet/__init__.py +86 -80
- diffusers/pipelines/controlnet/multicontrolnet.py +7 -178
- diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +9 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +9 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +37 -15
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +12 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +9 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +1790 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +1501 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +1627 -0
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +22 -4
- diffusers/pipelines/controlnet_sd3/__init__.py +4 -0
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +56 -20
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
- diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +16 -4
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +32 -9
- diffusers/pipelines/flux/__init__.py +23 -1
- diffusers/pipelines/flux/modeling_flux.py +47 -0
- diffusers/pipelines/flux/pipeline_flux.py +256 -48
- diffusers/pipelines/flux/pipeline_flux_control.py +889 -0
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +945 -0
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1141 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +1006 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +998 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1204 -0
- diffusers/pipelines/flux/pipeline_flux_fill.py +969 -0
- diffusers/pipelines/flux/pipeline_flux_img2img.py +856 -0
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +1022 -0
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +492 -0
- diffusers/pipelines/flux/pipeline_output.py +16 -0
- diffusers/pipelines/free_noise_utils.py +365 -5
- diffusers/pipelines/hunyuan_video/__init__.py +48 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +687 -0
- diffusers/pipelines/hunyuan_video/pipeline_output.py +20 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +20 -4
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +9 -9
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +2 -2
- diffusers/pipelines/kolors/pipeline_kolors.py +1 -1
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +14 -11
- diffusers/pipelines/kolors/text_encoder.py +2 -2
- diffusers/pipelines/kolors/tokenizer.py +4 -0
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latte/pipeline_latte.py +2 -2
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +15 -3
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +15 -3
- diffusers/pipelines/ltx/__init__.py +50 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +789 -0
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +885 -0
- diffusers/pipelines/ltx/pipeline_output.py +20 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +3 -10
- diffusers/pipelines/mochi/__init__.py +48 -0
- diffusers/pipelines/mochi/pipeline_mochi.py +748 -0
- diffusers/pipelines/mochi/pipeline_output.py +20 -0
- diffusers/pipelines/pag/__init__.py +13 -0
- diffusers/pipelines/pag/pag_utils.py +8 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +2 -3
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1543 -0
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1683 -0
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +22 -6
- diffusers/pipelines/pag/pipeline_pag_kolors.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +7 -14
- diffusers/pipelines/pag/pipeline_pag_sana.py +886 -0
- diffusers/pipelines/pag/pipeline_pag_sd.py +18 -6
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +18 -9
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +1058 -0
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +5 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1094 -0
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +1356 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +18 -6
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +31 -16
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +42 -19
- diffusers/pipelines/pia/pipeline_pia.py +2 -0
- diffusers/pipelines/pipeline_flax_utils.py +1 -1
- diffusers/pipelines/pipeline_loading_utils.py +250 -31
- diffusers/pipelines/pipeline_utils.py +158 -186
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +7 -14
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +7 -14
- diffusers/pipelines/sana/__init__.py +47 -0
- diffusers/pipelines/sana/pipeline_output.py +21 -0
- diffusers/pipelines/sana/pipeline_sana.py +884 -0
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +12 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +35 -3
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +46 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +241 -81
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +228 -23
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +82 -13
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +60 -11
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -1
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +16 -4
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +29 -22
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +29 -22
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +1 -1
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +1 -1
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +16 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +15 -3
- diffusers/pipelines/unidiffuser/modeling_uvit.py +2 -2
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/quantizers/__init__.py +16 -0
- diffusers/quantizers/auto.py +139 -0
- diffusers/quantizers/base.py +233 -0
- diffusers/quantizers/bitsandbytes/__init__.py +2 -0
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +561 -0
- diffusers/quantizers/bitsandbytes/utils.py +306 -0
- diffusers/quantizers/gguf/__init__.py +1 -0
- diffusers/quantizers/gguf/gguf_quantizer.py +159 -0
- diffusers/quantizers/gguf/utils.py +456 -0
- diffusers/quantizers/quantization_config.py +669 -0
- diffusers/quantizers/torchao/__init__.py +15 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +285 -0
- diffusers/schedulers/scheduling_ddim.py +4 -1
- diffusers/schedulers/scheduling_ddim_cogvideox.py +4 -1
- diffusers/schedulers/scheduling_ddim_parallel.py +4 -1
- diffusers/schedulers/scheduling_ddpm.py +6 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +6 -7
- diffusers/schedulers/scheduling_deis_multistep.py +102 -6
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +113 -6
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +111 -5
- diffusers/schedulers/scheduling_dpmsolver_sde.py +125 -10
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +126 -7
- diffusers/schedulers/scheduling_edm_euler.py +8 -6
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +4 -1
- diffusers/schedulers/scheduling_euler_discrete.py +92 -7
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +153 -6
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +4 -5
- diffusers/schedulers/scheduling_heun_discrete.py +114 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +116 -11
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +110 -8
- diffusers/schedulers/scheduling_lcm.py +2 -6
- diffusers/schedulers/scheduling_lms_discrete.py +76 -1
- diffusers/schedulers/scheduling_repaint.py +1 -1
- diffusers/schedulers/scheduling_sasolver.py +102 -6
- diffusers/schedulers/scheduling_tcd.py +2 -6
- diffusers/schedulers/scheduling_unclip.py +4 -1
- diffusers/schedulers/scheduling_unipc_multistep.py +127 -5
- diffusers/training_utils.py +63 -19
- diffusers/utils/__init__.py +7 -1
- diffusers/utils/constants.py +1 -0
- diffusers/utils/dummy_pt_objects.py +240 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +435 -0
- diffusers/utils/dynamic_modules_utils.py +3 -3
- diffusers/utils/hub_utils.py +44 -40
- diffusers/utils/import_utils.py +98 -8
- diffusers/utils/loading_utils.py +28 -4
- diffusers/utils/peft_utils.py +6 -3
- diffusers/utils/testing_utils.py +115 -1
- diffusers/utils/torch_utils.py +3 -0
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/METADATA +73 -72
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/RECORD +268 -193
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/WHEEL +1 -1
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/LICENSE +0 -0
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,20 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
|
3
|
+
import torch
|
4
|
+
|
5
|
+
from diffusers.utils import BaseOutput
|
6
|
+
|
7
|
+
|
8
|
+
@dataclass
|
9
|
+
class HunyuanVideoPipelineOutput(BaseOutput):
|
10
|
+
r"""
|
11
|
+
Output class for HunyuanVideo pipelines.
|
12
|
+
|
13
|
+
Args:
|
14
|
+
frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
|
15
|
+
List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing
|
16
|
+
denoised PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
|
17
|
+
`(batch_size, num_frames, channels, height, width)`.
|
18
|
+
"""
|
19
|
+
|
20
|
+
frames: torch.Tensor
|
@@ -125,9 +125,21 @@ def get_resize_crop_region_for_grid(src, tgt_size):
|
|
125
125
|
|
126
126
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
|
127
127
|
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
128
|
-
"""
|
129
|
-
|
130
|
-
|
128
|
+
r"""
|
129
|
+
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
130
|
+
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
131
|
+
Flawed](https://arxiv.org/pdf/2305.08891.pdf).
|
132
|
+
|
133
|
+
Args:
|
134
|
+
noise_cfg (`torch.Tensor`):
|
135
|
+
The predicted noise tensor for the guided diffusion process.
|
136
|
+
noise_pred_text (`torch.Tensor`):
|
137
|
+
The predicted noise tensor for the text-guided diffusion process.
|
138
|
+
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
139
|
+
A rescale factor applied to the noise predictions.
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
|
131
143
|
"""
|
132
144
|
std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
|
133
145
|
std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
|
@@ -786,7 +798,11 @@ class HunyuanDiTPipeline(DiffusionPipeline):
|
|
786
798
|
base_size = 512 // 8 // self.transformer.config.patch_size
|
787
799
|
grid_crops_coords = get_resize_crop_region_for_grid((grid_height, grid_width), base_size)
|
788
800
|
image_rotary_emb = get_2d_rotary_pos_embed(
|
789
|
-
self.transformer.inner_dim // self.transformer.num_heads,
|
801
|
+
self.transformer.inner_dim // self.transformer.num_heads,
|
802
|
+
grid_crops_coords,
|
803
|
+
(grid_height, grid_width),
|
804
|
+
device=device,
|
805
|
+
output_type="pt",
|
790
806
|
)
|
791
807
|
|
792
808
|
style = torch.tensor([0], device=device)
|
@@ -193,15 +193,15 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
|
|
193
193
|
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
194
194
|
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
195
195
|
|
196
|
-
def enable_sequential_cpu_offload(self, gpu_id=
|
196
|
+
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
|
197
197
|
r"""
|
198
198
|
Offloads all models (`unet`, `text_encoder`, `vae`, and `safety checker` state dicts) to CPU using 🤗
|
199
199
|
Accelerate, significantly reducing memory usage. Models are moved to a `torch.device('meta')` and loaded on a
|
200
200
|
GPU only when their specific submodule's `forward` method is called. Offloading happens on a submodule basis.
|
201
201
|
Memory savings are higher than using `enable_model_cpu_offload`, but performance is lower.
|
202
202
|
"""
|
203
|
-
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
204
|
-
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
203
|
+
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
|
204
|
+
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
|
205
205
|
|
206
206
|
def progress_bar(self, iterable=None, total=None):
|
207
207
|
self.prior_pipe.progress_bar(iterable=iterable, total=total)
|
@@ -411,7 +411,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
|
|
411
411
|
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
412
412
|
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
413
413
|
|
414
|
-
def enable_sequential_cpu_offload(self, gpu_id=
|
414
|
+
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
|
415
415
|
r"""
|
416
416
|
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
417
417
|
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
|
@@ -419,8 +419,8 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
|
|
419
419
|
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
420
420
|
`enable_model_cpu_offload`, but performance is lower.
|
421
421
|
"""
|
422
|
-
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
423
|
-
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
422
|
+
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
|
423
|
+
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
|
424
424
|
|
425
425
|
def progress_bar(self, iterable=None, total=None):
|
426
426
|
self.prior_pipe.progress_bar(iterable=iterable, total=total)
|
@@ -652,7 +652,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
|
|
652
652
|
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
653
653
|
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
654
654
|
|
655
|
-
def enable_sequential_cpu_offload(self, gpu_id=
|
655
|
+
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
|
656
656
|
r"""
|
657
657
|
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
658
658
|
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
|
@@ -660,8 +660,8 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
|
|
660
660
|
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
661
661
|
`enable_model_cpu_offload`, but performance is lower.
|
662
662
|
"""
|
663
|
-
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
664
|
-
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
663
|
+
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
|
664
|
+
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
|
665
665
|
|
666
666
|
def progress_bar(self, iterable=None, total=None):
|
667
667
|
self.prior_pipe.progress_bar(iterable=iterable, total=total)
|
@@ -547,7 +547,7 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
|
|
547
547
|
negative_image_embeds = prior_outputs[1]
|
548
548
|
|
549
549
|
prompt = [prompt] if not isinstance(prompt, (list, tuple)) else prompt
|
550
|
-
image = [image] if isinstance(
|
550
|
+
image = [image] if isinstance(image, PIL.Image.Image) else image
|
551
551
|
|
552
552
|
if len(prompt) < image_embeds.shape[0] and image_embeds.shape[0] % len(prompt) == 0:
|
553
553
|
prompt = (image_embeds.shape[0] // len(prompt)) * prompt
|
@@ -813,7 +813,7 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
|
|
813
813
|
negative_image_embeds = prior_outputs[1]
|
814
814
|
|
815
815
|
prompt = [prompt] if not isinstance(prompt, (list, tuple)) else prompt
|
816
|
-
image = [image] if isinstance(
|
816
|
+
image = [image] if isinstance(image, PIL.Image.Image) else image
|
817
817
|
mask_image = [mask_image] if isinstance(mask_image, PIL.Image.Image) else mask_image
|
818
818
|
|
819
819
|
if len(prompt) < image_embeds.shape[0] and image_embeds.shape[0] % len(prompt) == 0:
|
@@ -70,7 +70,7 @@ def retrieve_timesteps(
|
|
70
70
|
sigmas: Optional[List[float]] = None,
|
71
71
|
**kwargs,
|
72
72
|
):
|
73
|
-
"""
|
73
|
+
r"""
|
74
74
|
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
|
75
75
|
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
|
76
76
|
|
@@ -89,7 +89,7 @@ def retrieve_timesteps(
|
|
89
89
|
sigmas: Optional[List[float]] = None,
|
90
90
|
**kwargs,
|
91
91
|
):
|
92
|
-
"""
|
92
|
+
r"""
|
93
93
|
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
|
94
94
|
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
|
95
95
|
|
@@ -564,14 +564,16 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
|
|
564
564
|
if denoising_start is None:
|
565
565
|
init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
|
566
566
|
t_start = max(num_inference_steps - init_timestep, 0)
|
567
|
-
else:
|
568
|
-
t_start = 0
|
569
567
|
|
570
|
-
|
568
|
+
timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
|
569
|
+
if hasattr(self.scheduler, "set_begin_index"):
|
570
|
+
self.scheduler.set_begin_index(t_start * self.scheduler.order)
|
571
571
|
|
572
|
-
|
573
|
-
|
574
|
-
|
572
|
+
return timesteps, num_inference_steps - t_start
|
573
|
+
|
574
|
+
else:
|
575
|
+
# Strength is irrelevant if we directly request a timestep to start at;
|
576
|
+
# that is, strength is determined by the denoising_start instead.
|
575
577
|
discrete_timestep_cutoff = int(
|
576
578
|
round(
|
577
579
|
self.scheduler.config.num_train_timesteps
|
@@ -579,7 +581,7 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
|
|
579
581
|
)
|
580
582
|
)
|
581
583
|
|
582
|
-
num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item()
|
584
|
+
num_inference_steps = (self.scheduler.timesteps < discrete_timestep_cutoff).sum().item()
|
583
585
|
if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
|
584
586
|
# if the scheduler is a 2nd order scheduler we might have to do +1
|
585
587
|
# because `num_inference_steps` might be even given that every timestep
|
@@ -590,11 +592,12 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
|
|
590
592
|
num_inference_steps = num_inference_steps + 1
|
591
593
|
|
592
594
|
# because t_n+1 >= t_n, we slice the timesteps starting from the end
|
593
|
-
|
595
|
+
t_start = len(self.scheduler.timesteps) - num_inference_steps
|
596
|
+
timesteps = self.scheduler.timesteps[t_start:]
|
597
|
+
if hasattr(self.scheduler, "set_begin_index"):
|
598
|
+
self.scheduler.set_begin_index(t_start)
|
594
599
|
return timesteps, num_inference_steps
|
595
600
|
|
596
|
-
return timesteps, num_inference_steps - t_start
|
597
|
-
|
598
601
|
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_img2img.StableDiffusionXLImg2ImgPipeline.prepare_latents
|
599
602
|
def prepare_latents(
|
600
603
|
self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True
|
@@ -590,7 +590,7 @@ class GLMTransformer(torch.nn.Module):
|
|
590
590
|
if not kv_caches:
|
591
591
|
kv_caches = [None for _ in range(self.num_layers)]
|
592
592
|
presents = () if use_cache else None
|
593
|
-
if
|
593
|
+
if torch.is_grad_enabled() and self.gradient_checkpointing:
|
594
594
|
if use_cache:
|
595
595
|
logger.warning_once(
|
596
596
|
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
|
@@ -604,7 +604,7 @@ class GLMTransformer(torch.nn.Module):
|
|
604
604
|
all_hidden_states = all_hidden_states + (hidden_states,)
|
605
605
|
|
606
606
|
layer = self._get_layer(index)
|
607
|
-
if
|
607
|
+
if torch.is_grad_enabled() and self.gradient_checkpointing:
|
608
608
|
layer_ret = torch.utils.checkpoint.checkpoint(
|
609
609
|
layer, hidden_states, attention_mask, rotary_pos_emb, kv_caches[index], use_cache
|
610
610
|
)
|
@@ -277,6 +277,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
277
277
|
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
278
278
|
pad_to_multiple_of: Optional[int] = None,
|
279
279
|
return_attention_mask: Optional[bool] = None,
|
280
|
+
padding_side: Optional[bool] = None,
|
280
281
|
) -> dict:
|
281
282
|
"""
|
282
283
|
Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
|
@@ -298,6 +299,9 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
298
299
|
pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
|
299
300
|
This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
|
300
301
|
`>= 7.5` (Volta).
|
302
|
+
padding_side (`str`, *optional*):
|
303
|
+
The side on which the model should have padding applied. Should be selected between ['right', 'left'].
|
304
|
+
Default value is picked from the class attribute of the same name.
|
301
305
|
return_attention_mask:
|
302
306
|
(optional) Set to False to avoid returning attention mask (default: set to model specifics)
|
303
307
|
"""
|
@@ -66,7 +66,7 @@ def retrieve_timesteps(
|
|
66
66
|
sigmas: Optional[List[float]] = None,
|
67
67
|
**kwargs,
|
68
68
|
):
|
69
|
-
"""
|
69
|
+
r"""
|
70
70
|
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
|
71
71
|
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
|
72
72
|
|
@@ -70,7 +70,7 @@ def retrieve_timesteps(
|
|
70
70
|
sigmas: Optional[List[float]] = None,
|
71
71
|
**kwargs,
|
72
72
|
):
|
73
|
-
"""
|
73
|
+
r"""
|
74
74
|
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
|
75
75
|
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
|
76
76
|
|
@@ -675,7 +675,7 @@ class LDMBertEncoder(LDMBertPreTrainedModel):
|
|
675
675
|
for idx, encoder_layer in enumerate(self.layers):
|
676
676
|
if output_hidden_states:
|
677
677
|
encoder_states = encoder_states + (hidden_states,)
|
678
|
-
if
|
678
|
+
if torch.is_grad_enabled() and self.gradient_checkpointing:
|
679
679
|
|
680
680
|
def create_custom_forward(module):
|
681
681
|
def custom_forward(*inputs):
|
@@ -56,7 +56,7 @@ EXAMPLE_DOC_STRING = """
|
|
56
56
|
>>> from diffusers.utils import export_to_gif
|
57
57
|
|
58
58
|
>>> # You can replace the checkpoint id with "maxin-cn/Latte-1" too.
|
59
|
-
>>> pipe = LattePipeline.from_pretrained("maxin-cn/Latte-1", torch_dtype=torch.float16)
|
59
|
+
>>> pipe = LattePipeline.from_pretrained("maxin-cn/Latte-1", torch_dtype=torch.float16)
|
60
60
|
>>> # Enable memory optimizations.
|
61
61
|
>>> pipe.enable_model_cpu_offload()
|
62
62
|
|
@@ -76,7 +76,7 @@ def retrieve_timesteps(
|
|
76
76
|
sigmas: Optional[List[float]] = None,
|
77
77
|
**kwargs,
|
78
78
|
):
|
79
|
-
"""
|
79
|
+
r"""
|
80
80
|
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
|
81
81
|
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
|
82
82
|
|
@@ -234,9 +234,21 @@ class LEDITSCrossAttnProcessor:
|
|
234
234
|
|
235
235
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
|
236
236
|
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
237
|
-
"""
|
238
|
-
|
239
|
-
|
237
|
+
r"""
|
238
|
+
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
239
|
+
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
240
|
+
Flawed](https://arxiv.org/pdf/2305.08891.pdf).
|
241
|
+
|
242
|
+
Args:
|
243
|
+
noise_cfg (`torch.Tensor`):
|
244
|
+
The predicted noise tensor for the guided diffusion process.
|
245
|
+
noise_pred_text (`torch.Tensor`):
|
246
|
+
The predicted noise tensor for the text-guided diffusion process.
|
247
|
+
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
248
|
+
A rescale factor applied to the noise predictions.
|
249
|
+
|
250
|
+
Returns:
|
251
|
+
noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
|
240
252
|
"""
|
241
253
|
std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
|
242
254
|
std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
|
@@ -1643,9 +1643,21 @@ class LEditsPPPipelineStableDiffusionXL(
|
|
1643
1643
|
|
1644
1644
|
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.rescale_noise_cfg
|
1645
1645
|
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
1646
|
-
"""
|
1647
|
-
|
1648
|
-
|
1646
|
+
r"""
|
1647
|
+
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
1648
|
+
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
1649
|
+
Flawed](https://arxiv.org/pdf/2305.08891.pdf).
|
1650
|
+
|
1651
|
+
Args:
|
1652
|
+
noise_cfg (`torch.Tensor`):
|
1653
|
+
The predicted noise tensor for the guided diffusion process.
|
1654
|
+
noise_pred_text (`torch.Tensor`):
|
1655
|
+
The predicted noise tensor for the text-guided diffusion process.
|
1656
|
+
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
1657
|
+
A rescale factor applied to the noise predictions.
|
1658
|
+
|
1659
|
+
Returns:
|
1660
|
+
noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
|
1649
1661
|
"""
|
1650
1662
|
std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
|
1651
1663
|
std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
|
@@ -0,0 +1,50 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from ...utils import (
|
4
|
+
DIFFUSERS_SLOW_IMPORT,
|
5
|
+
OptionalDependencyNotAvailable,
|
6
|
+
_LazyModule,
|
7
|
+
get_objects_from_module,
|
8
|
+
is_torch_available,
|
9
|
+
is_transformers_available,
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
_dummy_objects = {}
|
14
|
+
_import_structure = {}
|
15
|
+
|
16
|
+
|
17
|
+
try:
|
18
|
+
if not (is_transformers_available() and is_torch_available()):
|
19
|
+
raise OptionalDependencyNotAvailable()
|
20
|
+
except OptionalDependencyNotAvailable:
|
21
|
+
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
22
|
+
|
23
|
+
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
24
|
+
else:
|
25
|
+
_import_structure["pipeline_ltx"] = ["LTXPipeline"]
|
26
|
+
_import_structure["pipeline_ltx_image2video"] = ["LTXImageToVideoPipeline"]
|
27
|
+
|
28
|
+
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
29
|
+
try:
|
30
|
+
if not (is_transformers_available() and is_torch_available()):
|
31
|
+
raise OptionalDependencyNotAvailable()
|
32
|
+
|
33
|
+
except OptionalDependencyNotAvailable:
|
34
|
+
from ...utils.dummy_torch_and_transformers_objects import *
|
35
|
+
else:
|
36
|
+
from .pipeline_ltx import LTXPipeline
|
37
|
+
from .pipeline_ltx_image2video import LTXImageToVideoPipeline
|
38
|
+
|
39
|
+
else:
|
40
|
+
import sys
|
41
|
+
|
42
|
+
sys.modules[__name__] = _LazyModule(
|
43
|
+
__name__,
|
44
|
+
globals()["__file__"],
|
45
|
+
_import_structure,
|
46
|
+
module_spec=__spec__,
|
47
|
+
)
|
48
|
+
|
49
|
+
for name, value in _dummy_objects.items():
|
50
|
+
setattr(sys.modules[__name__], name, value)
|