diffusers 0.30.2__py3-none-any.whl → 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +38 -2
- diffusers/configuration_utils.py +12 -0
- diffusers/dependency_versions_table.py +1 -1
- diffusers/image_processor.py +257 -54
- diffusers/loaders/__init__.py +2 -0
- diffusers/loaders/ip_adapter.py +5 -1
- diffusers/loaders/lora_base.py +14 -7
- diffusers/loaders/lora_conversion_utils.py +332 -0
- diffusers/loaders/lora_pipeline.py +707 -41
- diffusers/loaders/peft.py +1 -0
- diffusers/loaders/single_file_utils.py +81 -4
- diffusers/loaders/textual_inversion.py +2 -0
- diffusers/loaders/unet.py +39 -8
- diffusers/models/__init__.py +4 -0
- diffusers/models/adapter.py +53 -53
- diffusers/models/attention.py +86 -10
- diffusers/models/attention_processor.py +169 -133
- diffusers/models/autoencoders/autoencoder_kl.py +71 -11
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +287 -85
- diffusers/models/controlnet_flux.py +536 -0
- diffusers/models/controlnet_sd3.py +7 -3
- diffusers/models/controlnet_sparsectrl.py +0 -1
- diffusers/models/embeddings.py +238 -61
- diffusers/models/embeddings_flax.py +23 -9
- diffusers/models/model_loading_utils.py +182 -14
- diffusers/models/modeling_utils.py +283 -46
- diffusers/models/normalization.py +79 -0
- diffusers/models/transformers/__init__.py +1 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +1 -0
- diffusers/models/transformers/cogvideox_transformer_3d.py +58 -36
- diffusers/models/transformers/pixart_transformer_2d.py +9 -1
- diffusers/models/transformers/transformer_cogview3plus.py +386 -0
- diffusers/models/transformers/transformer_flux.py +161 -44
- diffusers/models/transformers/transformer_sd3.py +7 -1
- diffusers/models/unets/unet_2d_condition.py +8 -8
- diffusers/models/unets/unet_motion_model.py +41 -63
- diffusers/models/upsampling.py +6 -6
- diffusers/pipelines/__init__.py +40 -7
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +45 -21
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +44 -20
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +18 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +104 -66
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +1 -1
- diffusers/pipelines/auto_pipeline.py +39 -8
- diffusers/pipelines/cogvideo/__init__.py +6 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +32 -34
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +794 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +837 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +825 -0
- diffusers/pipelines/cogvideo/pipeline_output.py +20 -0
- diffusers/pipelines/cogview3/__init__.py +47 -0
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
- diffusers/pipelines/cogview3/pipeline_output.py +21 -0
- diffusers/pipelines/controlnet/pipeline_controlnet.py +9 -1
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +8 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +8 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +36 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +9 -1
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +8 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +17 -3
- diffusers/pipelines/controlnet_sd3/__init__.py +4 -0
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +3 -1
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
- diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +16 -4
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +1 -1
- diffusers/pipelines/flux/__init__.py +10 -0
- diffusers/pipelines/flux/pipeline_flux.py +53 -20
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +984 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +988 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1182 -0
- diffusers/pipelines/flux/pipeline_flux_img2img.py +850 -0
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +1015 -0
- diffusers/pipelines/free_noise_utils.py +365 -5
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +15 -3
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +2 -2
- diffusers/pipelines/kolors/pipeline_kolors.py +1 -1
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +14 -11
- diffusers/pipelines/kolors/tokenizer.py +4 -0
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +1 -1
- diffusers/pipelines/latte/pipeline_latte.py +2 -2
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +15 -3
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +15 -3
- diffusers/pipelines/lumina/pipeline_lumina.py +2 -2
- diffusers/pipelines/pag/__init__.py +6 -0
- diffusers/pipelines/pag/pag_utils.py +8 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1544 -0
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1685 -0
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +17 -5
- diffusers/pipelines/pag/pipeline_pag_kolors.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_sd.py +18 -6
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +12 -3
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +5 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1091 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +18 -6
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +31 -16
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +42 -19
- diffusers/pipelines/pia/pipeline_pia.py +2 -0
- diffusers/pipelines/pipeline_loading_utils.py +225 -27
- diffusers/pipelines/pipeline_utils.py +123 -180
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +1 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +35 -3
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +28 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +241 -81
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +12 -3
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +20 -4
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +3 -3
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +16 -4
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +29 -14
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +29 -14
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +1 -1
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +1 -1
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +16 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +15 -3
- diffusers/quantizers/__init__.py +16 -0
- diffusers/quantizers/auto.py +126 -0
- diffusers/quantizers/base.py +233 -0
- diffusers/quantizers/bitsandbytes/__init__.py +2 -0
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +558 -0
- diffusers/quantizers/bitsandbytes/utils.py +306 -0
- diffusers/quantizers/quantization_config.py +391 -0
- diffusers/schedulers/scheduling_ddim.py +4 -1
- diffusers/schedulers/scheduling_ddim_cogvideox.py +4 -1
- diffusers/schedulers/scheduling_ddim_parallel.py +4 -1
- diffusers/schedulers/scheduling_ddpm.py +4 -1
- diffusers/schedulers/scheduling_ddpm_parallel.py +4 -1
- diffusers/schedulers/scheduling_deis_multistep.py +78 -1
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +82 -1
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +80 -1
- diffusers/schedulers/scheduling_dpmsolver_sde.py +125 -10
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +82 -1
- diffusers/schedulers/scheduling_edm_euler.py +8 -6
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +4 -1
- diffusers/schedulers/scheduling_euler_discrete.py +92 -7
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +4 -5
- diffusers/schedulers/scheduling_heun_discrete.py +114 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +116 -11
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +110 -8
- diffusers/schedulers/scheduling_lms_discrete.py +76 -1
- diffusers/schedulers/scheduling_sasolver.py +78 -1
- diffusers/schedulers/scheduling_unclip.py +4 -1
- diffusers/schedulers/scheduling_unipc_multistep.py +78 -1
- diffusers/training_utils.py +48 -18
- diffusers/utils/__init__.py +2 -1
- diffusers/utils/dummy_pt_objects.py +60 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +195 -0
- diffusers/utils/hub_utils.py +16 -4
- diffusers/utils/import_utils.py +31 -8
- diffusers/utils/loading_utils.py +28 -4
- diffusers/utils/peft_utils.py +3 -3
- diffusers/utils/testing_utils.py +59 -0
- {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/METADATA +7 -6
- {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/RECORD +173 -147
- {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/WHEEL +1 -1
- {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/LICENSE +0 -0
- {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/top_level.txt +0 -0
@@ -128,9 +128,21 @@ def get_resize_crop_region_for_grid(src, tgt_size):
|
|
128
128
|
|
129
129
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
|
130
130
|
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
131
|
-
"""
|
132
|
-
|
133
|
-
|
131
|
+
r"""
|
132
|
+
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
133
|
+
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
134
|
+
Flawed](https://arxiv.org/pdf/2305.08891.pdf).
|
135
|
+
|
136
|
+
Args:
|
137
|
+
noise_cfg (`torch.Tensor`):
|
138
|
+
The predicted noise tensor for the guided diffusion process.
|
139
|
+
noise_pred_text (`torch.Tensor`):
|
140
|
+
The predicted noise tensor for the text-guided diffusion process.
|
141
|
+
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
142
|
+
A rescale factor applied to the noise predictions.
|
143
|
+
|
144
|
+
Returns:
|
145
|
+
noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
|
134
146
|
"""
|
135
147
|
std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
|
136
148
|
std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
|
@@ -893,8 +905,8 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin):
|
|
893
905
|
|
894
906
|
# perform guidance
|
895
907
|
if self.do_perturbed_attention_guidance:
|
896
|
-
noise_pred = self._apply_perturbed_attention_guidance(
|
897
|
-
noise_pred, self.do_classifier_free_guidance, self.guidance_scale, t
|
908
|
+
noise_pred, noise_pred_text = self._apply_perturbed_attention_guidance(
|
909
|
+
noise_pred, self.do_classifier_free_guidance, self.guidance_scale, t, True
|
898
910
|
)
|
899
911
|
elif self.do_classifier_free_guidance:
|
900
912
|
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
@@ -75,7 +75,7 @@ def retrieve_timesteps(
|
|
75
75
|
sigmas: Optional[List[float]] = None,
|
76
76
|
**kwargs,
|
77
77
|
):
|
78
|
-
"""
|
78
|
+
r"""
|
79
79
|
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
|
80
80
|
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
|
81
81
|
|
@@ -81,7 +81,7 @@ def retrieve_timesteps(
|
|
81
81
|
sigmas: Optional[List[float]] = None,
|
82
82
|
**kwargs,
|
83
83
|
):
|
84
|
-
"""
|
84
|
+
r"""
|
85
85
|
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
|
86
86
|
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
|
87
87
|
|
@@ -60,9 +60,21 @@ EXAMPLE_DOC_STRING = """
|
|
60
60
|
|
61
61
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
|
62
62
|
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
63
|
-
"""
|
64
|
-
|
65
|
-
|
63
|
+
r"""
|
64
|
+
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
65
|
+
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
66
|
+
Flawed](https://arxiv.org/pdf/2305.08891.pdf).
|
67
|
+
|
68
|
+
Args:
|
69
|
+
noise_cfg (`torch.Tensor`):
|
70
|
+
The predicted noise tensor for the guided diffusion process.
|
71
|
+
noise_pred_text (`torch.Tensor`):
|
72
|
+
The predicted noise tensor for the text-guided diffusion process.
|
73
|
+
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
74
|
+
A rescale factor applied to the noise predictions.
|
75
|
+
|
76
|
+
Returns:
|
77
|
+
noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
|
66
78
|
"""
|
67
79
|
std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
|
68
80
|
std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
|
@@ -82,7 +94,7 @@ def retrieve_timesteps(
|
|
82
94
|
sigmas: Optional[List[float]] = None,
|
83
95
|
**kwargs,
|
84
96
|
):
|
85
|
-
"""
|
97
|
+
r"""
|
86
98
|
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
|
87
99
|
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
|
88
100
|
|
@@ -993,8 +1005,8 @@ class StableDiffusionPAGPipeline(
|
|
993
1005
|
|
994
1006
|
# perform guidance
|
995
1007
|
if self.do_perturbed_attention_guidance:
|
996
|
-
noise_pred = self._apply_perturbed_attention_guidance(
|
997
|
-
noise_pred, self.do_classifier_free_guidance, self.guidance_scale, t
|
1008
|
+
noise_pred, noise_pred_text = self._apply_perturbed_attention_guidance(
|
1009
|
+
noise_pred, self.do_classifier_free_guidance, self.guidance_scale, t, True
|
998
1010
|
)
|
999
1011
|
|
1000
1012
|
elif self.do_classifier_free_guidance:
|
@@ -82,7 +82,7 @@ def retrieve_timesteps(
|
|
82
82
|
sigmas: Optional[List[float]] = None,
|
83
83
|
**kwargs,
|
84
84
|
):
|
85
|
-
"""
|
85
|
+
r"""
|
86
86
|
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
|
87
87
|
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
|
88
88
|
|
@@ -212,6 +212,9 @@ class StableDiffusion3PAGPipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSin
|
|
212
212
|
if hasattr(self, "transformer") and self.transformer is not None
|
213
213
|
else 128
|
214
214
|
)
|
215
|
+
self.patch_size = (
|
216
|
+
self.transformer.config.patch_size if hasattr(self, "transformer") and self.transformer is not None else 2
|
217
|
+
)
|
215
218
|
|
216
219
|
self.set_pag_applied_layers(
|
217
220
|
pag_applied_layers, pag_attn_processors=(PAGCFGJointAttnProcessor2_0(), PAGJointAttnProcessor2_0())
|
@@ -542,8 +545,14 @@ class StableDiffusion3PAGPipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSin
|
|
542
545
|
callback_on_step_end_tensor_inputs=None,
|
543
546
|
max_sequence_length=None,
|
544
547
|
):
|
545
|
-
if
|
546
|
-
|
548
|
+
if (
|
549
|
+
height % (self.vae_scale_factor * self.patch_size) != 0
|
550
|
+
or width % (self.vae_scale_factor * self.patch_size) != 0
|
551
|
+
):
|
552
|
+
raise ValueError(
|
553
|
+
f"`height` and `width` have to be divisible by {self.vae_scale_factor * self.patch_size} but are {height} and {width}."
|
554
|
+
f"You can use height {height - height % (self.vae_scale_factor * self.patch_size)} and width {width - width % (self.vae_scale_factor * self.patch_size)}."
|
555
|
+
)
|
547
556
|
|
548
557
|
if callback_on_step_end_tensor_inputs is not None and not all(
|
549
558
|
k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs
|
@@ -734,6 +734,8 @@ class AnimateDiffPAGPipeline(
|
|
734
734
|
elif self.do_classifier_free_guidance:
|
735
735
|
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
|
736
736
|
|
737
|
+
prompt_embeds = prompt_embeds.repeat_interleave(repeats=num_frames, dim=0)
|
738
|
+
|
737
739
|
if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
|
738
740
|
ip_adapter_image_embeds = self.prepare_ip_adapter_image_embeds(
|
739
741
|
ip_adapter_image,
|
@@ -805,7 +807,9 @@ class AnimateDiffPAGPipeline(
|
|
805
807
|
with self.progress_bar(total=self._num_timesteps) as progress_bar:
|
806
808
|
for i, t in enumerate(timesteps):
|
807
809
|
# expand the latents if we are doing classifier free guidance
|
808
|
-
latent_model_input = torch.cat(
|
810
|
+
latent_model_input = torch.cat(
|
811
|
+
[latents] * (prompt_embeds.shape[0] // num_frames // latents.shape[0])
|
812
|
+
)
|
809
813
|
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
810
814
|
|
811
815
|
# predict the noise residual
|