diffusers 0.30.3__py3-none-any.whl → 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +97 -4
- diffusers/callbacks.py +56 -3
- diffusers/configuration_utils.py +13 -1
- diffusers/image_processor.py +282 -71
- diffusers/loaders/__init__.py +24 -3
- diffusers/loaders/ip_adapter.py +543 -16
- diffusers/loaders/lora_base.py +138 -125
- diffusers/loaders/lora_conversion_utils.py +647 -0
- diffusers/loaders/lora_pipeline.py +2216 -230
- diffusers/loaders/peft.py +380 -0
- diffusers/loaders/single_file_model.py +71 -4
- diffusers/loaders/single_file_utils.py +597 -10
- diffusers/loaders/textual_inversion.py +5 -3
- diffusers/loaders/transformer_flux.py +181 -0
- diffusers/loaders/transformer_sd3.py +89 -0
- diffusers/loaders/unet.py +56 -12
- diffusers/models/__init__.py +49 -12
- diffusers/models/activations.py +22 -9
- diffusers/models/adapter.py +53 -53
- diffusers/models/attention.py +98 -13
- diffusers/models/attention_flax.py +1 -1
- diffusers/models/attention_processor.py +2160 -346
- diffusers/models/autoencoders/__init__.py +5 -0
- diffusers/models/autoencoders/autoencoder_dc.py +620 -0
- diffusers/models/autoencoders/autoencoder_kl.py +73 -12
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +1149 -0
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +213 -105
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +1176 -0
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +1338 -0
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +1166 -0
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +3 -10
- diffusers/models/autoencoders/autoencoder_tiny.py +4 -2
- diffusers/models/autoencoders/vae.py +18 -5
- diffusers/models/controlnet.py +47 -802
- diffusers/models/controlnet_flux.py +70 -0
- diffusers/models/controlnet_sd3.py +26 -376
- diffusers/models/controlnet_sparsectrl.py +46 -719
- diffusers/models/controlnets/__init__.py +23 -0
- diffusers/models/controlnets/controlnet.py +872 -0
- diffusers/models/{controlnet_flax.py → controlnets/controlnet_flax.py} +5 -5
- diffusers/models/controlnets/controlnet_flux.py +536 -0
- diffusers/models/{controlnet_hunyuan.py → controlnets/controlnet_hunyuan.py} +7 -7
- diffusers/models/controlnets/controlnet_sd3.py +489 -0
- diffusers/models/controlnets/controlnet_sparsectrl.py +788 -0
- diffusers/models/controlnets/controlnet_union.py +832 -0
- diffusers/models/{controlnet_xs.py → controlnets/controlnet_xs.py} +14 -13
- diffusers/models/controlnets/multicontrolnet.py +183 -0
- diffusers/models/embeddings.py +996 -92
- diffusers/models/embeddings_flax.py +23 -9
- diffusers/models/model_loading_utils.py +264 -14
- diffusers/models/modeling_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +334 -51
- diffusers/models/normalization.py +157 -13
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +3 -2
- diffusers/models/transformers/cogvideox_transformer_3d.py +69 -13
- diffusers/models/transformers/dit_transformer_2d.py +1 -1
- diffusers/models/transformers/latte_transformer_3d.py +4 -4
- diffusers/models/transformers/pixart_transformer_2d.py +10 -2
- diffusers/models/transformers/sana_transformer.py +488 -0
- diffusers/models/transformers/stable_audio_transformer.py +1 -1
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +422 -0
- diffusers/models/transformers/transformer_cogview3plus.py +386 -0
- diffusers/models/transformers/transformer_flux.py +189 -51
- diffusers/models/transformers/transformer_hunyuan_video.py +789 -0
- diffusers/models/transformers/transformer_ltx.py +469 -0
- diffusers/models/transformers/transformer_mochi.py +499 -0
- diffusers/models/transformers/transformer_sd3.py +112 -18
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +8 -1
- diffusers/models/unets/unet_2d_blocks.py +88 -21
- diffusers/models/unets/unet_2d_condition.py +9 -9
- diffusers/models/unets/unet_3d_blocks.py +9 -7
- diffusers/models/unets/unet_motion_model.py +46 -68
- diffusers/models/unets/unet_spatio_temporal_condition.py +23 -0
- diffusers/models/unets/unet_stable_cascade.py +2 -2
- diffusers/models/unets/uvit_2d.py +1 -1
- diffusers/models/upsampling.py +14 -6
- diffusers/pipelines/__init__.py +69 -6
- diffusers/pipelines/allegro/__init__.py +48 -0
- diffusers/pipelines/allegro/pipeline_allegro.py +938 -0
- diffusers/pipelines/allegro/pipeline_output.py +23 -0
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +45 -21
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +52 -22
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +18 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +3 -1
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +104 -72
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +3 -3
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +2 -9
- diffusers/pipelines/auto_pipeline.py +88 -10
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/cogvideo/__init__.py +2 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +80 -39
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +825 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +108 -50
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +89 -50
- diffusers/pipelines/cogview3/__init__.py +47 -0
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
- diffusers/pipelines/cogview3/pipeline_output.py +21 -0
- diffusers/pipelines/controlnet/__init__.py +86 -80
- diffusers/pipelines/controlnet/multicontrolnet.py +7 -178
- diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +9 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +9 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +37 -15
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +12 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +9 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +1790 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +1501 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +1627 -0
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +22 -4
- diffusers/pipelines/controlnet_sd3/__init__.py +4 -0
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +56 -20
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
- diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +16 -4
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +32 -9
- diffusers/pipelines/flux/__init__.py +23 -1
- diffusers/pipelines/flux/modeling_flux.py +47 -0
- diffusers/pipelines/flux/pipeline_flux.py +256 -48
- diffusers/pipelines/flux/pipeline_flux_control.py +889 -0
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +945 -0
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1141 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +1006 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +998 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1204 -0
- diffusers/pipelines/flux/pipeline_flux_fill.py +969 -0
- diffusers/pipelines/flux/pipeline_flux_img2img.py +856 -0
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +1022 -0
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +492 -0
- diffusers/pipelines/flux/pipeline_output.py +16 -0
- diffusers/pipelines/free_noise_utils.py +365 -5
- diffusers/pipelines/hunyuan_video/__init__.py +48 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +687 -0
- diffusers/pipelines/hunyuan_video/pipeline_output.py +20 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +20 -4
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +9 -9
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +2 -2
- diffusers/pipelines/kolors/pipeline_kolors.py +1 -1
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +14 -11
- diffusers/pipelines/kolors/text_encoder.py +2 -2
- diffusers/pipelines/kolors/tokenizer.py +4 -0
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latte/pipeline_latte.py +2 -2
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +15 -3
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +15 -3
- diffusers/pipelines/ltx/__init__.py +50 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +789 -0
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +885 -0
- diffusers/pipelines/ltx/pipeline_output.py +20 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +3 -10
- diffusers/pipelines/mochi/__init__.py +48 -0
- diffusers/pipelines/mochi/pipeline_mochi.py +748 -0
- diffusers/pipelines/mochi/pipeline_output.py +20 -0
- diffusers/pipelines/pag/__init__.py +13 -0
- diffusers/pipelines/pag/pag_utils.py +8 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +2 -3
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1543 -0
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1683 -0
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +22 -6
- diffusers/pipelines/pag/pipeline_pag_kolors.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +7 -14
- diffusers/pipelines/pag/pipeline_pag_sana.py +886 -0
- diffusers/pipelines/pag/pipeline_pag_sd.py +18 -6
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +18 -9
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +1058 -0
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +5 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1094 -0
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +1356 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +18 -6
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +31 -16
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +42 -19
- diffusers/pipelines/pia/pipeline_pia.py +2 -0
- diffusers/pipelines/pipeline_flax_utils.py +1 -1
- diffusers/pipelines/pipeline_loading_utils.py +250 -31
- diffusers/pipelines/pipeline_utils.py +158 -186
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +7 -14
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +7 -14
- diffusers/pipelines/sana/__init__.py +47 -0
- diffusers/pipelines/sana/pipeline_output.py +21 -0
- diffusers/pipelines/sana/pipeline_sana.py +884 -0
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +12 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +35 -3
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +46 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +241 -81
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +228 -23
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +82 -13
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +60 -11
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -1
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +16 -4
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +29 -22
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +29 -22
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +1 -1
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +1 -1
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +16 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +15 -3
- diffusers/pipelines/unidiffuser/modeling_uvit.py +2 -2
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/quantizers/__init__.py +16 -0
- diffusers/quantizers/auto.py +139 -0
- diffusers/quantizers/base.py +233 -0
- diffusers/quantizers/bitsandbytes/__init__.py +2 -0
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +561 -0
- diffusers/quantizers/bitsandbytes/utils.py +306 -0
- diffusers/quantizers/gguf/__init__.py +1 -0
- diffusers/quantizers/gguf/gguf_quantizer.py +159 -0
- diffusers/quantizers/gguf/utils.py +456 -0
- diffusers/quantizers/quantization_config.py +669 -0
- diffusers/quantizers/torchao/__init__.py +15 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +285 -0
- diffusers/schedulers/scheduling_ddim.py +4 -1
- diffusers/schedulers/scheduling_ddim_cogvideox.py +4 -1
- diffusers/schedulers/scheduling_ddim_parallel.py +4 -1
- diffusers/schedulers/scheduling_ddpm.py +6 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +6 -7
- diffusers/schedulers/scheduling_deis_multistep.py +102 -6
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +113 -6
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +111 -5
- diffusers/schedulers/scheduling_dpmsolver_sde.py +125 -10
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +126 -7
- diffusers/schedulers/scheduling_edm_euler.py +8 -6
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +4 -1
- diffusers/schedulers/scheduling_euler_discrete.py +92 -7
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +153 -6
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +4 -5
- diffusers/schedulers/scheduling_heun_discrete.py +114 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +116 -11
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +110 -8
- diffusers/schedulers/scheduling_lcm.py +2 -6
- diffusers/schedulers/scheduling_lms_discrete.py +76 -1
- diffusers/schedulers/scheduling_repaint.py +1 -1
- diffusers/schedulers/scheduling_sasolver.py +102 -6
- diffusers/schedulers/scheduling_tcd.py +2 -6
- diffusers/schedulers/scheduling_unclip.py +4 -1
- diffusers/schedulers/scheduling_unipc_multistep.py +127 -5
- diffusers/training_utils.py +63 -19
- diffusers/utils/__init__.py +7 -1
- diffusers/utils/constants.py +1 -0
- diffusers/utils/dummy_pt_objects.py +240 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +435 -0
- diffusers/utils/dynamic_modules_utils.py +3 -3
- diffusers/utils/hub_utils.py +44 -40
- diffusers/utils/import_utils.py +98 -8
- diffusers/utils/loading_utils.py +28 -4
- diffusers/utils/peft_utils.py +6 -3
- diffusers/utils/testing_utils.py +115 -1
- diffusers/utils/torch_utils.py +3 -0
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/METADATA +73 -72
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/RECORD +268 -193
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/WHEEL +1 -1
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/LICENSE +0 -0
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/top_level.txt +0 -0
@@ -116,7 +116,7 @@ class AnimateDiffTransformer3D(nn.Module):
|
|
116
116
|
|
117
117
|
self.in_channels = in_channels
|
118
118
|
|
119
|
-
self.norm =
|
119
|
+
self.norm = nn.GroupNorm(num_groups=norm_num_groups, num_channels=in_channels, eps=1e-6, affine=True)
|
120
120
|
self.proj_in = nn.Linear(in_channels, inner_dim)
|
121
121
|
|
122
122
|
# 3. Define transformers blocks
|
@@ -187,12 +187,12 @@ class AnimateDiffTransformer3D(nn.Module):
|
|
187
187
|
hidden_states = self.norm(hidden_states)
|
188
188
|
hidden_states = hidden_states.permute(0, 3, 4, 2, 1).reshape(batch_size * height * width, num_frames, channel)
|
189
189
|
|
190
|
-
hidden_states = self.proj_in(hidden_states)
|
190
|
+
hidden_states = self.proj_in(input=hidden_states)
|
191
191
|
|
192
192
|
# 2. Blocks
|
193
193
|
for block in self.transformer_blocks:
|
194
194
|
hidden_states = block(
|
195
|
-
hidden_states,
|
195
|
+
hidden_states=hidden_states,
|
196
196
|
encoder_hidden_states=encoder_hidden_states,
|
197
197
|
timestep=timestep,
|
198
198
|
cross_attention_kwargs=cross_attention_kwargs,
|
@@ -200,7 +200,7 @@ class AnimateDiffTransformer3D(nn.Module):
|
|
200
200
|
)
|
201
201
|
|
202
202
|
# 3. Output
|
203
|
-
hidden_states = self.proj_out(hidden_states)
|
203
|
+
hidden_states = self.proj_out(input=hidden_states)
|
204
204
|
hidden_states = (
|
205
205
|
hidden_states[None, None, :]
|
206
206
|
.reshape(batch_size, height, width, num_frames, channel)
|
@@ -323,7 +323,7 @@ class DownBlockMotion(nn.Module):
|
|
323
323
|
|
324
324
|
blocks = zip(self.resnets, self.motion_modules)
|
325
325
|
for resnet, motion_module in blocks:
|
326
|
-
if
|
326
|
+
if torch.is_grad_enabled() and self.gradient_checkpointing:
|
327
327
|
|
328
328
|
def create_custom_forward(module):
|
329
329
|
def custom_forward(*inputs):
|
@@ -344,7 +344,7 @@ class DownBlockMotion(nn.Module):
|
|
344
344
|
)
|
345
345
|
|
346
346
|
else:
|
347
|
-
hidden_states = resnet(hidden_states, temb)
|
347
|
+
hidden_states = resnet(input_tensor=hidden_states, temb=temb)
|
348
348
|
|
349
349
|
hidden_states = motion_module(hidden_states, num_frames=num_frames)
|
350
350
|
|
@@ -352,7 +352,7 @@ class DownBlockMotion(nn.Module):
|
|
352
352
|
|
353
353
|
if self.downsamplers is not None:
|
354
354
|
for downsampler in self.downsamplers:
|
355
|
-
hidden_states = downsampler(hidden_states)
|
355
|
+
hidden_states = downsampler(hidden_states=hidden_states)
|
356
356
|
|
357
357
|
output_states = output_states + (hidden_states,)
|
358
358
|
|
@@ -513,7 +513,7 @@ class CrossAttnDownBlockMotion(nn.Module):
|
|
513
513
|
|
514
514
|
blocks = list(zip(self.resnets, self.attentions, self.motion_modules))
|
515
515
|
for i, (resnet, attn, motion_module) in enumerate(blocks):
|
516
|
-
if
|
516
|
+
if torch.is_grad_enabled() and self.gradient_checkpointing:
|
517
517
|
|
518
518
|
def create_custom_forward(module, return_dict=None):
|
519
519
|
def custom_forward(*inputs):
|
@@ -531,25 +531,18 @@ class CrossAttnDownBlockMotion(nn.Module):
|
|
531
531
|
temb,
|
532
532
|
**ckpt_kwargs,
|
533
533
|
)
|
534
|
-
hidden_states = attn(
|
535
|
-
hidden_states,
|
536
|
-
encoder_hidden_states=encoder_hidden_states,
|
537
|
-
cross_attention_kwargs=cross_attention_kwargs,
|
538
|
-
attention_mask=attention_mask,
|
539
|
-
encoder_attention_mask=encoder_attention_mask,
|
540
|
-
return_dict=False,
|
541
|
-
)[0]
|
542
534
|
else:
|
543
|
-
hidden_states = resnet(hidden_states, temb)
|
535
|
+
hidden_states = resnet(input_tensor=hidden_states, temb=temb)
|
536
|
+
|
537
|
+
hidden_states = attn(
|
538
|
+
hidden_states=hidden_states,
|
539
|
+
encoder_hidden_states=encoder_hidden_states,
|
540
|
+
cross_attention_kwargs=cross_attention_kwargs,
|
541
|
+
attention_mask=attention_mask,
|
542
|
+
encoder_attention_mask=encoder_attention_mask,
|
543
|
+
return_dict=False,
|
544
|
+
)[0]
|
544
545
|
|
545
|
-
hidden_states = attn(
|
546
|
-
hidden_states,
|
547
|
-
encoder_hidden_states=encoder_hidden_states,
|
548
|
-
cross_attention_kwargs=cross_attention_kwargs,
|
549
|
-
attention_mask=attention_mask,
|
550
|
-
encoder_attention_mask=encoder_attention_mask,
|
551
|
-
return_dict=False,
|
552
|
-
)[0]
|
553
546
|
hidden_states = motion_module(
|
554
547
|
hidden_states,
|
555
548
|
num_frames=num_frames,
|
@@ -563,7 +556,7 @@ class CrossAttnDownBlockMotion(nn.Module):
|
|
563
556
|
|
564
557
|
if self.downsamplers is not None:
|
565
558
|
for downsampler in self.downsamplers:
|
566
|
-
hidden_states = downsampler(hidden_states)
|
559
|
+
hidden_states = downsampler(hidden_states=hidden_states)
|
567
560
|
|
568
561
|
output_states = output_states + (hidden_states,)
|
569
562
|
|
@@ -739,7 +732,7 @@ class CrossAttnUpBlockMotion(nn.Module):
|
|
739
732
|
|
740
733
|
hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
|
741
734
|
|
742
|
-
if
|
735
|
+
if torch.is_grad_enabled() and self.gradient_checkpointing:
|
743
736
|
|
744
737
|
def create_custom_forward(module, return_dict=None):
|
745
738
|
def custom_forward(*inputs):
|
@@ -757,25 +750,18 @@ class CrossAttnUpBlockMotion(nn.Module):
|
|
757
750
|
temb,
|
758
751
|
**ckpt_kwargs,
|
759
752
|
)
|
760
|
-
hidden_states = attn(
|
761
|
-
hidden_states,
|
762
|
-
encoder_hidden_states=encoder_hidden_states,
|
763
|
-
cross_attention_kwargs=cross_attention_kwargs,
|
764
|
-
attention_mask=attention_mask,
|
765
|
-
encoder_attention_mask=encoder_attention_mask,
|
766
|
-
return_dict=False,
|
767
|
-
)[0]
|
768
753
|
else:
|
769
|
-
hidden_states = resnet(hidden_states, temb)
|
754
|
+
hidden_states = resnet(input_tensor=hidden_states, temb=temb)
|
755
|
+
|
756
|
+
hidden_states = attn(
|
757
|
+
hidden_states=hidden_states,
|
758
|
+
encoder_hidden_states=encoder_hidden_states,
|
759
|
+
cross_attention_kwargs=cross_attention_kwargs,
|
760
|
+
attention_mask=attention_mask,
|
761
|
+
encoder_attention_mask=encoder_attention_mask,
|
762
|
+
return_dict=False,
|
763
|
+
)[0]
|
770
764
|
|
771
|
-
hidden_states = attn(
|
772
|
-
hidden_states,
|
773
|
-
encoder_hidden_states=encoder_hidden_states,
|
774
|
-
cross_attention_kwargs=cross_attention_kwargs,
|
775
|
-
attention_mask=attention_mask,
|
776
|
-
encoder_attention_mask=encoder_attention_mask,
|
777
|
-
return_dict=False,
|
778
|
-
)[0]
|
779
765
|
hidden_states = motion_module(
|
780
766
|
hidden_states,
|
781
767
|
num_frames=num_frames,
|
@@ -783,7 +769,7 @@ class CrossAttnUpBlockMotion(nn.Module):
|
|
783
769
|
|
784
770
|
if self.upsamplers is not None:
|
785
771
|
for upsampler in self.upsamplers:
|
786
|
-
hidden_states = upsampler(hidden_states, upsample_size)
|
772
|
+
hidden_states = upsampler(hidden_states=hidden_states, output_size=upsample_size)
|
787
773
|
|
788
774
|
return hidden_states
|
789
775
|
|
@@ -909,7 +895,7 @@ class UpBlockMotion(nn.Module):
|
|
909
895
|
|
910
896
|
hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
|
911
897
|
|
912
|
-
if
|
898
|
+
if torch.is_grad_enabled() and self.gradient_checkpointing:
|
913
899
|
|
914
900
|
def create_custom_forward(module):
|
915
901
|
def custom_forward(*inputs):
|
@@ -929,13 +915,13 @@ class UpBlockMotion(nn.Module):
|
|
929
915
|
create_custom_forward(resnet), hidden_states, temb
|
930
916
|
)
|
931
917
|
else:
|
932
|
-
hidden_states = resnet(hidden_states, temb)
|
918
|
+
hidden_states = resnet(input_tensor=hidden_states, temb=temb)
|
933
919
|
|
934
920
|
hidden_states = motion_module(hidden_states, num_frames=num_frames)
|
935
921
|
|
936
922
|
if self.upsamplers is not None:
|
937
923
|
for upsampler in self.upsamplers:
|
938
|
-
hidden_states = upsampler(hidden_states, upsample_size)
|
924
|
+
hidden_states = upsampler(hidden_states=hidden_states, output_size=upsample_size)
|
939
925
|
|
940
926
|
return hidden_states
|
941
927
|
|
@@ -1080,11 +1066,20 @@ class UNetMidBlockCrossAttnMotion(nn.Module):
|
|
1080
1066
|
if cross_attention_kwargs.get("scale", None) is not None:
|
1081
1067
|
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
1082
1068
|
|
1083
|
-
hidden_states = self.resnets[0](hidden_states, temb)
|
1069
|
+
hidden_states = self.resnets[0](input_tensor=hidden_states, temb=temb)
|
1084
1070
|
|
1085
1071
|
blocks = zip(self.attentions, self.resnets[1:], self.motion_modules)
|
1086
1072
|
for attn, resnet, motion_module in blocks:
|
1087
|
-
|
1073
|
+
hidden_states = attn(
|
1074
|
+
hidden_states=hidden_states,
|
1075
|
+
encoder_hidden_states=encoder_hidden_states,
|
1076
|
+
cross_attention_kwargs=cross_attention_kwargs,
|
1077
|
+
attention_mask=attention_mask,
|
1078
|
+
encoder_attention_mask=encoder_attention_mask,
|
1079
|
+
return_dict=False,
|
1080
|
+
)[0]
|
1081
|
+
|
1082
|
+
if torch.is_grad_enabled() and self.gradient_checkpointing:
|
1088
1083
|
|
1089
1084
|
def create_custom_forward(module, return_dict=None):
|
1090
1085
|
def custom_forward(*inputs):
|
@@ -1096,14 +1091,6 @@ class UNetMidBlockCrossAttnMotion(nn.Module):
|
|
1096
1091
|
return custom_forward
|
1097
1092
|
|
1098
1093
|
ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
|
1099
|
-
hidden_states = attn(
|
1100
|
-
hidden_states,
|
1101
|
-
encoder_hidden_states=encoder_hidden_states,
|
1102
|
-
cross_attention_kwargs=cross_attention_kwargs,
|
1103
|
-
attention_mask=attention_mask,
|
1104
|
-
encoder_attention_mask=encoder_attention_mask,
|
1105
|
-
return_dict=False,
|
1106
|
-
)[0]
|
1107
1094
|
hidden_states = torch.utils.checkpoint.checkpoint(
|
1108
1095
|
create_custom_forward(motion_module),
|
1109
1096
|
hidden_states,
|
@@ -1117,19 +1104,11 @@ class UNetMidBlockCrossAttnMotion(nn.Module):
|
|
1117
1104
|
**ckpt_kwargs,
|
1118
1105
|
)
|
1119
1106
|
else:
|
1120
|
-
hidden_states = attn(
|
1121
|
-
hidden_states,
|
1122
|
-
encoder_hidden_states=encoder_hidden_states,
|
1123
|
-
cross_attention_kwargs=cross_attention_kwargs,
|
1124
|
-
attention_mask=attention_mask,
|
1125
|
-
encoder_attention_mask=encoder_attention_mask,
|
1126
|
-
return_dict=False,
|
1127
|
-
)[0]
|
1128
1107
|
hidden_states = motion_module(
|
1129
1108
|
hidden_states,
|
1130
1109
|
num_frames=num_frames,
|
1131
1110
|
)
|
1132
|
-
hidden_states = resnet(hidden_states, temb)
|
1111
|
+
hidden_states = resnet(input_tensor=hidden_states, temb=temb)
|
1133
1112
|
|
1134
1113
|
return hidden_states
|
1135
1114
|
|
@@ -2178,7 +2157,6 @@ class UNetMotionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin, Peft
|
|
2178
2157
|
|
2179
2158
|
emb = emb if aug_emb is None else emb + aug_emb
|
2180
2159
|
emb = emb.repeat_interleave(repeats=num_frames, dim=0)
|
2181
|
-
encoder_hidden_states = encoder_hidden_states.repeat_interleave(repeats=num_frames, dim=0)
|
2182
2160
|
|
2183
2161
|
if self.encoder_hid_proj is not None and self.config.encoder_hid_dim_type == "ip_image_proj":
|
2184
2162
|
if "image_embeds" not in added_cond_kwargs:
|
@@ -382,6 +382,20 @@ class UNetSpatioTemporalConditionModel(ModelMixin, ConfigMixin, UNet2DConditionL
|
|
382
382
|
If `return_dict` is True, an [`~models.unet_slatio_temporal.UNetSpatioTemporalConditionOutput`] is
|
383
383
|
returned, otherwise a `tuple` is returned where the first element is the sample tensor.
|
384
384
|
"""
|
385
|
+
# By default samples have to be AT least a multiple of the overall upsampling factor.
|
386
|
+
# The overall upsampling factor is equal to 2 ** (# num of upsampling layears).
|
387
|
+
# However, the upsampling interpolation output size can be forced to fit any upsampling size
|
388
|
+
# on the fly if necessary.
|
389
|
+
default_overall_up_factor = 2**self.num_upsamplers
|
390
|
+
|
391
|
+
# upsample size should be forwarded when sample is not a multiple of `default_overall_up_factor`
|
392
|
+
forward_upsample_size = False
|
393
|
+
upsample_size = None
|
394
|
+
|
395
|
+
if any(s % default_overall_up_factor != 0 for s in sample.shape[-2:]):
|
396
|
+
logger.info("Forward upsample size to force interpolation output size.")
|
397
|
+
forward_upsample_size = True
|
398
|
+
|
385
399
|
# 1. time
|
386
400
|
timesteps = timestep
|
387
401
|
if not torch.is_tensor(timesteps):
|
@@ -457,15 +471,23 @@ class UNetSpatioTemporalConditionModel(ModelMixin, ConfigMixin, UNet2DConditionL
|
|
457
471
|
|
458
472
|
# 5. up
|
459
473
|
for i, upsample_block in enumerate(self.up_blocks):
|
474
|
+
is_final_block = i == len(self.up_blocks) - 1
|
475
|
+
|
460
476
|
res_samples = down_block_res_samples[-len(upsample_block.resnets) :]
|
461
477
|
down_block_res_samples = down_block_res_samples[: -len(upsample_block.resnets)]
|
462
478
|
|
479
|
+
# if we have not reached the final block and need to forward the
|
480
|
+
# upsample size, we do it here
|
481
|
+
if not is_final_block and forward_upsample_size:
|
482
|
+
upsample_size = down_block_res_samples[-1].shape[2:]
|
483
|
+
|
463
484
|
if hasattr(upsample_block, "has_cross_attention") and upsample_block.has_cross_attention:
|
464
485
|
sample = upsample_block(
|
465
486
|
hidden_states=sample,
|
466
487
|
temb=emb,
|
467
488
|
res_hidden_states_tuple=res_samples,
|
468
489
|
encoder_hidden_states=encoder_hidden_states,
|
490
|
+
upsample_size=upsample_size,
|
469
491
|
image_only_indicator=image_only_indicator,
|
470
492
|
)
|
471
493
|
else:
|
@@ -473,6 +495,7 @@ class UNetSpatioTemporalConditionModel(ModelMixin, ConfigMixin, UNet2DConditionL
|
|
473
495
|
hidden_states=sample,
|
474
496
|
temb=emb,
|
475
497
|
res_hidden_states_tuple=res_samples,
|
498
|
+
upsample_size=upsample_size,
|
476
499
|
image_only_indicator=image_only_indicator,
|
477
500
|
)
|
478
501
|
|
@@ -455,7 +455,7 @@ class StableCascadeUNet(ModelMixin, ConfigMixin, FromOriginalModelMixin):
|
|
455
455
|
level_outputs = []
|
456
456
|
block_group = zip(self.down_blocks, self.down_downscalers, self.down_repeat_mappers)
|
457
457
|
|
458
|
-
if
|
458
|
+
if torch.is_grad_enabled() and self.gradient_checkpointing:
|
459
459
|
|
460
460
|
def create_custom_forward(module):
|
461
461
|
def custom_forward(*inputs):
|
@@ -504,7 +504,7 @@ class StableCascadeUNet(ModelMixin, ConfigMixin, FromOriginalModelMixin):
|
|
504
504
|
x = level_outputs[0]
|
505
505
|
block_group = zip(self.up_blocks, self.up_upscalers, self.up_repeat_mappers)
|
506
506
|
|
507
|
-
if
|
507
|
+
if torch.is_grad_enabled() and self.gradient_checkpointing:
|
508
508
|
|
509
509
|
def create_custom_forward(module):
|
510
510
|
def custom_forward(*inputs):
|
@@ -181,7 +181,7 @@ class UVit2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
|
|
181
181
|
hidden_states = self.project_to_hidden(hidden_states)
|
182
182
|
|
183
183
|
for layer in self.transformer_layers:
|
184
|
-
if
|
184
|
+
if torch.is_grad_enabled() and self.gradient_checkpointing:
|
185
185
|
|
186
186
|
def layer_(*args):
|
187
187
|
return checkpoint(layer, *args)
|
diffusers/models/upsampling.py
CHANGED
@@ -19,6 +19,7 @@ import torch.nn as nn
|
|
19
19
|
import torch.nn.functional as F
|
20
20
|
|
21
21
|
from ..utils import deprecate
|
22
|
+
from ..utils.import_utils import is_torch_version
|
22
23
|
from .normalization import RMSNorm
|
23
24
|
|
24
25
|
|
@@ -151,11 +152,10 @@ class Upsample2D(nn.Module):
|
|
151
152
|
if self.use_conv_transpose:
|
152
153
|
return self.conv(hidden_states)
|
153
154
|
|
154
|
-
# Cast to float32 to as 'upsample_nearest2d_out_frame' op does not support bfloat16
|
155
|
-
#
|
156
|
-
# https://github.com/pytorch/pytorch/issues/86679
|
155
|
+
# Cast to float32 to as 'upsample_nearest2d_out_frame' op does not support bfloat16 until PyTorch 2.1
|
156
|
+
# https://github.com/pytorch/pytorch/issues/86679#issuecomment-1783978767
|
157
157
|
dtype = hidden_states.dtype
|
158
|
-
if dtype == torch.bfloat16:
|
158
|
+
if dtype == torch.bfloat16 and is_torch_version("<", "2.1"):
|
159
159
|
hidden_states = hidden_states.to(torch.float32)
|
160
160
|
|
161
161
|
# upsample_nearest_nhwc fails with large batch sizes. see https://github.com/huggingface/diffusers/issues/984
|
@@ -165,13 +165,21 @@ class Upsample2D(nn.Module):
|
|
165
165
|
# if `output_size` is passed we force the interpolation output
|
166
166
|
# size and do not make use of `scale_factor=2`
|
167
167
|
if self.interpolate:
|
168
|
+
# upsample_nearest_nhwc also fails when the number of output elements is large
|
169
|
+
# https://github.com/pytorch/pytorch/issues/141831
|
170
|
+
scale_factor = (
|
171
|
+
2 if output_size is None else max([f / s for f, s in zip(output_size, hidden_states.shape[-2:])])
|
172
|
+
)
|
173
|
+
if hidden_states.numel() * scale_factor > pow(2, 31):
|
174
|
+
hidden_states = hidden_states.contiguous()
|
175
|
+
|
168
176
|
if output_size is None:
|
169
177
|
hidden_states = F.interpolate(hidden_states, scale_factor=2.0, mode="nearest")
|
170
178
|
else:
|
171
179
|
hidden_states = F.interpolate(hidden_states, size=output_size, mode="nearest")
|
172
180
|
|
173
|
-
#
|
174
|
-
if dtype == torch.bfloat16:
|
181
|
+
# Cast back to original dtype
|
182
|
+
if dtype == torch.bfloat16 and is_torch_version("<", "2.1"):
|
175
183
|
hidden_states = hidden_states.to(dtype)
|
176
184
|
|
177
185
|
# TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed
|
diffusers/pipelines/__init__.py
CHANGED
@@ -116,6 +116,7 @@ else:
|
|
116
116
|
"VersatileDiffusionTextToImagePipeline",
|
117
117
|
]
|
118
118
|
)
|
119
|
+
_import_structure["allegro"] = ["AllegroPipeline"]
|
119
120
|
_import_structure["amused"] = ["AmusedImg2ImgPipeline", "AmusedInpaintPipeline", "AmusedPipeline"]
|
120
121
|
_import_structure["animatediff"] = [
|
121
122
|
"AnimateDiffPipeline",
|
@@ -123,8 +124,22 @@ else:
|
|
123
124
|
"AnimateDiffSDXLPipeline",
|
124
125
|
"AnimateDiffSparseControlNetPipeline",
|
125
126
|
"AnimateDiffVideoToVideoPipeline",
|
127
|
+
"AnimateDiffVideoToVideoControlNetPipeline",
|
128
|
+
]
|
129
|
+
_import_structure["flux"] = [
|
130
|
+
"FluxControlPipeline",
|
131
|
+
"FluxControlInpaintPipeline",
|
132
|
+
"FluxControlImg2ImgPipeline",
|
133
|
+
"FluxControlNetPipeline",
|
134
|
+
"FluxControlNetImg2ImgPipeline",
|
135
|
+
"FluxControlNetInpaintPipeline",
|
136
|
+
"FluxImg2ImgPipeline",
|
137
|
+
"FluxInpaintPipeline",
|
138
|
+
"FluxPipeline",
|
139
|
+
"FluxFillPipeline",
|
140
|
+
"FluxPriorReduxPipeline",
|
141
|
+
"ReduxImageEncoder",
|
126
142
|
]
|
127
|
-
_import_structure["flux"] = ["FluxPipeline"]
|
128
143
|
_import_structure["audioldm"] = ["AudioLDMPipeline"]
|
129
144
|
_import_structure["audioldm2"] = [
|
130
145
|
"AudioLDM2Pipeline",
|
@@ -136,7 +151,9 @@ else:
|
|
136
151
|
"CogVideoXPipeline",
|
137
152
|
"CogVideoXImageToVideoPipeline",
|
138
153
|
"CogVideoXVideoToVideoPipeline",
|
154
|
+
"CogVideoXFunControlPipeline",
|
139
155
|
]
|
156
|
+
_import_structure["cogview3"] = ["CogView3PlusPipeline"]
|
140
157
|
_import_structure["controlnet"].extend(
|
141
158
|
[
|
142
159
|
"BlipDiffusionControlNetPipeline",
|
@@ -146,21 +163,30 @@ else:
|
|
146
163
|
"StableDiffusionXLControlNetImg2ImgPipeline",
|
147
164
|
"StableDiffusionXLControlNetInpaintPipeline",
|
148
165
|
"StableDiffusionXLControlNetPipeline",
|
166
|
+
"StableDiffusionXLControlNetUnionPipeline",
|
167
|
+
"StableDiffusionXLControlNetUnionInpaintPipeline",
|
168
|
+
"StableDiffusionXLControlNetUnionImg2ImgPipeline",
|
149
169
|
]
|
150
170
|
)
|
151
171
|
_import_structure["pag"].extend(
|
152
172
|
[
|
173
|
+
"StableDiffusionControlNetPAGInpaintPipeline",
|
153
174
|
"AnimateDiffPAGPipeline",
|
154
175
|
"KolorsPAGPipeline",
|
155
176
|
"HunyuanDiTPAGPipeline",
|
156
177
|
"StableDiffusion3PAGPipeline",
|
178
|
+
"StableDiffusion3PAGImg2ImgPipeline",
|
157
179
|
"StableDiffusionPAGPipeline",
|
180
|
+
"StableDiffusionPAGImg2ImgPipeline",
|
181
|
+
"StableDiffusionPAGInpaintPipeline",
|
158
182
|
"StableDiffusionControlNetPAGPipeline",
|
159
183
|
"StableDiffusionXLPAGPipeline",
|
160
184
|
"StableDiffusionXLPAGInpaintPipeline",
|
185
|
+
"StableDiffusionXLControlNetPAGImg2ImgPipeline",
|
161
186
|
"StableDiffusionXLControlNetPAGPipeline",
|
162
187
|
"StableDiffusionXLPAGImg2ImgPipeline",
|
163
188
|
"PixArtSigmaPAGPipeline",
|
189
|
+
"SanaPAGPipeline",
|
164
190
|
]
|
165
191
|
)
|
166
192
|
_import_structure["controlnet_xs"].extend(
|
@@ -177,6 +203,7 @@ else:
|
|
177
203
|
_import_structure["controlnet_sd3"].extend(
|
178
204
|
[
|
179
205
|
"StableDiffusion3ControlNetPipeline",
|
206
|
+
"StableDiffusion3ControlNetInpaintingPipeline",
|
180
207
|
]
|
181
208
|
)
|
182
209
|
_import_structure["deepfloyd_if"] = [
|
@@ -188,6 +215,7 @@ else:
|
|
188
215
|
"IFSuperResolutionPipeline",
|
189
216
|
]
|
190
217
|
_import_structure["hunyuandit"] = ["HunyuanDiTPipeline"]
|
218
|
+
_import_structure["hunyuan_video"] = ["HunyuanVideoPipeline"]
|
191
219
|
_import_structure["kandinsky"] = [
|
192
220
|
"KandinskyCombinedPipeline",
|
193
221
|
"KandinskyImg2ImgCombinedPipeline",
|
@@ -225,6 +253,7 @@ else:
|
|
225
253
|
]
|
226
254
|
)
|
227
255
|
_import_structure["latte"] = ["LattePipeline"]
|
256
|
+
_import_structure["ltx"] = ["LTXPipeline", "LTXImageToVideoPipeline"]
|
228
257
|
_import_structure["lumina"] = ["LuminaText2ImgPipeline"]
|
229
258
|
_import_structure["marigold"].extend(
|
230
259
|
[
|
@@ -232,10 +261,12 @@ else:
|
|
232
261
|
"MarigoldNormalsPipeline",
|
233
262
|
]
|
234
263
|
)
|
264
|
+
_import_structure["mochi"] = ["MochiPipeline"]
|
235
265
|
_import_structure["musicldm"] = ["MusicLDMPipeline"]
|
236
266
|
_import_structure["paint_by_example"] = ["PaintByExamplePipeline"]
|
237
267
|
_import_structure["pia"] = ["PIAPipeline"]
|
238
268
|
_import_structure["pixart_alpha"] = ["PixArtAlphaPipeline", "PixArtSigmaPipeline"]
|
269
|
+
_import_structure["sana"] = ["SanaPipeline"]
|
239
270
|
_import_structure["semantic_stable_diffusion"] = ["SemanticStableDiffusionPipeline"]
|
240
271
|
_import_structure["shap_e"] = ["ShapEImg2ImgPipeline", "ShapEPipeline"]
|
241
272
|
_import_structure["stable_audio"] = [
|
@@ -440,12 +471,14 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
440
471
|
except OptionalDependencyNotAvailable:
|
441
472
|
from ..utils.dummy_torch_and_transformers_objects import *
|
442
473
|
else:
|
474
|
+
from .allegro import AllegroPipeline
|
443
475
|
from .amused import AmusedImg2ImgPipeline, AmusedInpaintPipeline, AmusedPipeline
|
444
476
|
from .animatediff import (
|
445
477
|
AnimateDiffControlNetPipeline,
|
446
478
|
AnimateDiffPipeline,
|
447
479
|
AnimateDiffSDXLPipeline,
|
448
480
|
AnimateDiffSparseControlNetPipeline,
|
481
|
+
AnimateDiffVideoToVideoControlNetPipeline,
|
449
482
|
AnimateDiffVideoToVideoPipeline,
|
450
483
|
)
|
451
484
|
from .audioldm import AudioLDMPipeline
|
@@ -456,7 +489,13 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
456
489
|
)
|
457
490
|
from .aura_flow import AuraFlowPipeline
|
458
491
|
from .blip_diffusion import BlipDiffusionPipeline
|
459
|
-
from .cogvideo import
|
492
|
+
from .cogvideo import (
|
493
|
+
CogVideoXFunControlPipeline,
|
494
|
+
CogVideoXImageToVideoPipeline,
|
495
|
+
CogVideoXPipeline,
|
496
|
+
CogVideoXVideoToVideoPipeline,
|
497
|
+
)
|
498
|
+
from .cogview3 import CogView3PlusPipeline
|
460
499
|
from .controlnet import (
|
461
500
|
BlipDiffusionControlNetPipeline,
|
462
501
|
StableDiffusionControlNetImg2ImgPipeline,
|
@@ -465,13 +504,14 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
465
504
|
StableDiffusionXLControlNetImg2ImgPipeline,
|
466
505
|
StableDiffusionXLControlNetInpaintPipeline,
|
467
506
|
StableDiffusionXLControlNetPipeline,
|
507
|
+
StableDiffusionXLControlNetUnionImg2ImgPipeline,
|
508
|
+
StableDiffusionXLControlNetUnionInpaintPipeline,
|
509
|
+
StableDiffusionXLControlNetUnionPipeline,
|
468
510
|
)
|
469
511
|
from .controlnet_hunyuandit import (
|
470
512
|
HunyuanDiTControlNetPipeline,
|
471
513
|
)
|
472
|
-
from .controlnet_sd3 import
|
473
|
-
StableDiffusion3ControlNetPipeline,
|
474
|
-
)
|
514
|
+
from .controlnet_sd3 import StableDiffusion3ControlNetInpaintingPipeline, StableDiffusion3ControlNetPipeline
|
475
515
|
from .controlnet_xs import (
|
476
516
|
StableDiffusionControlNetXSPipeline,
|
477
517
|
StableDiffusionXLControlNetXSPipeline,
|
@@ -498,7 +538,21 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
498
538
|
VersatileDiffusionTextToImagePipeline,
|
499
539
|
VQDiffusionPipeline,
|
500
540
|
)
|
501
|
-
from .flux import
|
541
|
+
from .flux import (
|
542
|
+
FluxControlImg2ImgPipeline,
|
543
|
+
FluxControlInpaintPipeline,
|
544
|
+
FluxControlNetImg2ImgPipeline,
|
545
|
+
FluxControlNetInpaintPipeline,
|
546
|
+
FluxControlNetPipeline,
|
547
|
+
FluxControlPipeline,
|
548
|
+
FluxFillPipeline,
|
549
|
+
FluxImg2ImgPipeline,
|
550
|
+
FluxInpaintPipeline,
|
551
|
+
FluxPipeline,
|
552
|
+
FluxPriorReduxPipeline,
|
553
|
+
ReduxImageEncoder,
|
554
|
+
)
|
555
|
+
from .hunyuan_video import HunyuanVideoPipeline
|
502
556
|
from .hunyuandit import HunyuanDiTPipeline
|
503
557
|
from .i2vgen_xl import I2VGenXLPipeline
|
504
558
|
from .kandinsky import (
|
@@ -538,20 +592,28 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
538
592
|
LEditsPPPipelineStableDiffusion,
|
539
593
|
LEditsPPPipelineStableDiffusionXL,
|
540
594
|
)
|
595
|
+
from .ltx import LTXImageToVideoPipeline, LTXPipeline
|
541
596
|
from .lumina import LuminaText2ImgPipeline
|
542
597
|
from .marigold import (
|
543
598
|
MarigoldDepthPipeline,
|
544
599
|
MarigoldNormalsPipeline,
|
545
600
|
)
|
601
|
+
from .mochi import MochiPipeline
|
546
602
|
from .musicldm import MusicLDMPipeline
|
547
603
|
from .pag import (
|
548
604
|
AnimateDiffPAGPipeline,
|
549
605
|
HunyuanDiTPAGPipeline,
|
550
606
|
KolorsPAGPipeline,
|
551
607
|
PixArtSigmaPAGPipeline,
|
608
|
+
SanaPAGPipeline,
|
609
|
+
StableDiffusion3PAGImg2ImgPipeline,
|
552
610
|
StableDiffusion3PAGPipeline,
|
611
|
+
StableDiffusionControlNetPAGInpaintPipeline,
|
553
612
|
StableDiffusionControlNetPAGPipeline,
|
613
|
+
StableDiffusionPAGImg2ImgPipeline,
|
614
|
+
StableDiffusionPAGInpaintPipeline,
|
554
615
|
StableDiffusionPAGPipeline,
|
616
|
+
StableDiffusionXLControlNetPAGImg2ImgPipeline,
|
555
617
|
StableDiffusionXLControlNetPAGPipeline,
|
556
618
|
StableDiffusionXLPAGImg2ImgPipeline,
|
557
619
|
StableDiffusionXLPAGInpaintPipeline,
|
@@ -560,6 +622,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
560
622
|
from .paint_by_example import PaintByExamplePipeline
|
561
623
|
from .pia import PIAPipeline
|
562
624
|
from .pixart_alpha import PixArtAlphaPipeline, PixArtSigmaPipeline
|
625
|
+
from .sana import SanaPipeline
|
563
626
|
from .semantic_stable_diffusion import SemanticStableDiffusionPipeline
|
564
627
|
from .shap_e import ShapEImg2ImgPipeline, ShapEPipeline
|
565
628
|
from .stable_audio import StableAudioPipeline, StableAudioProjectionModel
|
@@ -0,0 +1,48 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from ...utils import (
|
4
|
+
DIFFUSERS_SLOW_IMPORT,
|
5
|
+
OptionalDependencyNotAvailable,
|
6
|
+
_LazyModule,
|
7
|
+
get_objects_from_module,
|
8
|
+
is_torch_available,
|
9
|
+
is_transformers_available,
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
_dummy_objects = {}
|
14
|
+
_import_structure = {}
|
15
|
+
|
16
|
+
|
17
|
+
try:
|
18
|
+
if not (is_transformers_available() and is_torch_available()):
|
19
|
+
raise OptionalDependencyNotAvailable()
|
20
|
+
except OptionalDependencyNotAvailable:
|
21
|
+
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
22
|
+
|
23
|
+
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
24
|
+
else:
|
25
|
+
_import_structure["pipeline_allegro"] = ["AllegroPipeline"]
|
26
|
+
|
27
|
+
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
28
|
+
try:
|
29
|
+
if not (is_transformers_available() and is_torch_available()):
|
30
|
+
raise OptionalDependencyNotAvailable()
|
31
|
+
|
32
|
+
except OptionalDependencyNotAvailable:
|
33
|
+
from ...utils.dummy_torch_and_transformers_objects import *
|
34
|
+
else:
|
35
|
+
from .pipeline_allegro import AllegroPipeline
|
36
|
+
|
37
|
+
else:
|
38
|
+
import sys
|
39
|
+
|
40
|
+
sys.modules[__name__] = _LazyModule(
|
41
|
+
__name__,
|
42
|
+
globals()["__file__"],
|
43
|
+
_import_structure,
|
44
|
+
module_spec=__spec__,
|
45
|
+
)
|
46
|
+
|
47
|
+
for name, value in _dummy_objects.items():
|
48
|
+
setattr(sys.modules[__name__], name, value)
|