diffusers 0.30.3__py3-none-any.whl → 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +34 -2
- diffusers/configuration_utils.py +12 -0
- diffusers/dependency_versions_table.py +1 -1
- diffusers/image_processor.py +257 -54
- diffusers/loaders/__init__.py +2 -0
- diffusers/loaders/ip_adapter.py +5 -1
- diffusers/loaders/lora_base.py +14 -7
- diffusers/loaders/lora_conversion_utils.py +332 -0
- diffusers/loaders/lora_pipeline.py +707 -41
- diffusers/loaders/peft.py +1 -0
- diffusers/loaders/single_file_utils.py +81 -4
- diffusers/loaders/textual_inversion.py +2 -0
- diffusers/loaders/unet.py +39 -8
- diffusers/models/__init__.py +4 -0
- diffusers/models/adapter.py +53 -53
- diffusers/models/attention.py +86 -10
- diffusers/models/attention_processor.py +169 -133
- diffusers/models/autoencoders/autoencoder_kl.py +71 -11
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +187 -88
- diffusers/models/controlnet_flux.py +536 -0
- diffusers/models/controlnet_sd3.py +7 -3
- diffusers/models/controlnet_sparsectrl.py +0 -1
- diffusers/models/embeddings.py +170 -61
- diffusers/models/embeddings_flax.py +23 -9
- diffusers/models/model_loading_utils.py +182 -14
- diffusers/models/modeling_utils.py +283 -46
- diffusers/models/normalization.py +79 -0
- diffusers/models/transformers/__init__.py +1 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +1 -0
- diffusers/models/transformers/cogvideox_transformer_3d.py +23 -2
- diffusers/models/transformers/pixart_transformer_2d.py +9 -1
- diffusers/models/transformers/transformer_cogview3plus.py +386 -0
- diffusers/models/transformers/transformer_flux.py +161 -44
- diffusers/models/transformers/transformer_sd3.py +7 -1
- diffusers/models/unets/unet_2d_condition.py +8 -8
- diffusers/models/unets/unet_motion_model.py +41 -63
- diffusers/models/upsampling.py +6 -6
- diffusers/pipelines/__init__.py +35 -6
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +45 -21
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +44 -20
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +18 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +104 -66
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +1 -1
- diffusers/pipelines/auto_pipeline.py +39 -8
- diffusers/pipelines/cogvideo/__init__.py +2 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +30 -17
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +794 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +41 -31
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +42 -29
- diffusers/pipelines/cogview3/__init__.py +47 -0
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
- diffusers/pipelines/cogview3/pipeline_output.py +21 -0
- diffusers/pipelines/controlnet/pipeline_controlnet.py +9 -1
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +8 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +8 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +36 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +9 -1
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +8 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +17 -3
- diffusers/pipelines/controlnet_sd3/__init__.py +4 -0
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +3 -1
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
- diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +16 -4
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +1 -1
- diffusers/pipelines/flux/__init__.py +10 -0
- diffusers/pipelines/flux/pipeline_flux.py +53 -20
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +984 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +988 -0
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1182 -0
- diffusers/pipelines/flux/pipeline_flux_img2img.py +850 -0
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +1015 -0
- diffusers/pipelines/free_noise_utils.py +365 -5
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +15 -3
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +2 -2
- diffusers/pipelines/kolors/pipeline_kolors.py +1 -1
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +14 -11
- diffusers/pipelines/kolors/tokenizer.py +4 -0
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +1 -1
- diffusers/pipelines/latte/pipeline_latte.py +2 -2
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +15 -3
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +15 -3
- diffusers/pipelines/lumina/pipeline_lumina.py +2 -2
- diffusers/pipelines/pag/__init__.py +6 -0
- diffusers/pipelines/pag/pag_utils.py +8 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1544 -0
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1685 -0
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +17 -5
- diffusers/pipelines/pag/pipeline_pag_kolors.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_sd.py +18 -6
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +12 -3
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +5 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1091 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +18 -6
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +31 -16
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +42 -19
- diffusers/pipelines/pia/pipeline_pia.py +2 -0
- diffusers/pipelines/pipeline_loading_utils.py +225 -27
- diffusers/pipelines/pipeline_utils.py +123 -180
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +1 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +35 -3
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +28 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +241 -81
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +12 -3
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +20 -4
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +3 -3
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +16 -4
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +29 -14
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +29 -14
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +1 -1
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +1 -1
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +16 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +15 -3
- diffusers/quantizers/__init__.py +16 -0
- diffusers/quantizers/auto.py +126 -0
- diffusers/quantizers/base.py +233 -0
- diffusers/quantizers/bitsandbytes/__init__.py +2 -0
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +558 -0
- diffusers/quantizers/bitsandbytes/utils.py +306 -0
- diffusers/quantizers/quantization_config.py +391 -0
- diffusers/schedulers/scheduling_ddim.py +4 -1
- diffusers/schedulers/scheduling_ddim_cogvideox.py +4 -1
- diffusers/schedulers/scheduling_ddim_parallel.py +4 -1
- diffusers/schedulers/scheduling_ddpm.py +4 -1
- diffusers/schedulers/scheduling_ddpm_parallel.py +4 -1
- diffusers/schedulers/scheduling_deis_multistep.py +78 -1
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +82 -1
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +80 -1
- diffusers/schedulers/scheduling_dpmsolver_sde.py +125 -10
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +82 -1
- diffusers/schedulers/scheduling_edm_euler.py +8 -6
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +4 -1
- diffusers/schedulers/scheduling_euler_discrete.py +92 -7
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +4 -5
- diffusers/schedulers/scheduling_heun_discrete.py +114 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +116 -11
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +110 -8
- diffusers/schedulers/scheduling_lms_discrete.py +76 -1
- diffusers/schedulers/scheduling_sasolver.py +78 -1
- diffusers/schedulers/scheduling_unclip.py +4 -1
- diffusers/schedulers/scheduling_unipc_multistep.py +78 -1
- diffusers/training_utils.py +48 -18
- diffusers/utils/__init__.py +2 -1
- diffusers/utils/dummy_pt_objects.py +60 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +165 -0
- diffusers/utils/hub_utils.py +16 -4
- diffusers/utils/import_utils.py +31 -8
- diffusers/utils/loading_utils.py +28 -4
- diffusers/utils/peft_utils.py +3 -3
- diffusers/utils/testing_utils.py +59 -0
- {diffusers-0.30.3.dist-info → diffusers-0.31.0.dist-info}/METADATA +7 -6
- {diffusers-0.30.3.dist-info → diffusers-0.31.0.dist-info}/RECORD +172 -149
- {diffusers-0.30.3.dist-info → diffusers-0.31.0.dist-info}/LICENSE +0 -0
- {diffusers-0.30.3.dist-info → diffusers-0.31.0.dist-info}/WHEEL +0 -0
- {diffusers-0.30.3.dist-info → diffusers-0.31.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.30.3.dist-info → diffusers-0.31.0.dist-info}/top_level.txt +0 -0
diffusers/pipelines/__init__.py
CHANGED
@@ -123,8 +123,16 @@ else:
|
|
123
123
|
"AnimateDiffSDXLPipeline",
|
124
124
|
"AnimateDiffSparseControlNetPipeline",
|
125
125
|
"AnimateDiffVideoToVideoPipeline",
|
126
|
+
"AnimateDiffVideoToVideoControlNetPipeline",
|
127
|
+
]
|
128
|
+
_import_structure["flux"] = [
|
129
|
+
"FluxControlNetPipeline",
|
130
|
+
"FluxControlNetImg2ImgPipeline",
|
131
|
+
"FluxControlNetInpaintPipeline",
|
132
|
+
"FluxImg2ImgPipeline",
|
133
|
+
"FluxInpaintPipeline",
|
134
|
+
"FluxPipeline",
|
126
135
|
]
|
127
|
-
_import_structure["flux"] = ["FluxPipeline"]
|
128
136
|
_import_structure["audioldm"] = ["AudioLDMPipeline"]
|
129
137
|
_import_structure["audioldm2"] = [
|
130
138
|
"AudioLDM2Pipeline",
|
@@ -136,7 +144,9 @@ else:
|
|
136
144
|
"CogVideoXPipeline",
|
137
145
|
"CogVideoXImageToVideoPipeline",
|
138
146
|
"CogVideoXVideoToVideoPipeline",
|
147
|
+
"CogVideoXFunControlPipeline",
|
139
148
|
]
|
149
|
+
_import_structure["cogview3"] = ["CogView3PlusPipeline"]
|
140
150
|
_import_structure["controlnet"].extend(
|
141
151
|
[
|
142
152
|
"BlipDiffusionControlNetPipeline",
|
@@ -150,14 +160,17 @@ else:
|
|
150
160
|
)
|
151
161
|
_import_structure["pag"].extend(
|
152
162
|
[
|
163
|
+
"StableDiffusionControlNetPAGInpaintPipeline",
|
153
164
|
"AnimateDiffPAGPipeline",
|
154
165
|
"KolorsPAGPipeline",
|
155
166
|
"HunyuanDiTPAGPipeline",
|
156
167
|
"StableDiffusion3PAGPipeline",
|
157
168
|
"StableDiffusionPAGPipeline",
|
169
|
+
"StableDiffusionPAGImg2ImgPipeline",
|
158
170
|
"StableDiffusionControlNetPAGPipeline",
|
159
171
|
"StableDiffusionXLPAGPipeline",
|
160
172
|
"StableDiffusionXLPAGInpaintPipeline",
|
173
|
+
"StableDiffusionXLControlNetPAGImg2ImgPipeline",
|
161
174
|
"StableDiffusionXLControlNetPAGPipeline",
|
162
175
|
"StableDiffusionXLPAGImg2ImgPipeline",
|
163
176
|
"PixArtSigmaPAGPipeline",
|
@@ -177,6 +190,7 @@ else:
|
|
177
190
|
_import_structure["controlnet_sd3"].extend(
|
178
191
|
[
|
179
192
|
"StableDiffusion3ControlNetPipeline",
|
193
|
+
"StableDiffusion3ControlNetInpaintingPipeline",
|
180
194
|
]
|
181
195
|
)
|
182
196
|
_import_structure["deepfloyd_if"] = [
|
@@ -446,6 +460,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
446
460
|
AnimateDiffPipeline,
|
447
461
|
AnimateDiffSDXLPipeline,
|
448
462
|
AnimateDiffSparseControlNetPipeline,
|
463
|
+
AnimateDiffVideoToVideoControlNetPipeline,
|
449
464
|
AnimateDiffVideoToVideoPipeline,
|
450
465
|
)
|
451
466
|
from .audioldm import AudioLDMPipeline
|
@@ -456,7 +471,13 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
456
471
|
)
|
457
472
|
from .aura_flow import AuraFlowPipeline
|
458
473
|
from .blip_diffusion import BlipDiffusionPipeline
|
459
|
-
from .cogvideo import
|
474
|
+
from .cogvideo import (
|
475
|
+
CogVideoXFunControlPipeline,
|
476
|
+
CogVideoXImageToVideoPipeline,
|
477
|
+
CogVideoXPipeline,
|
478
|
+
CogVideoXVideoToVideoPipeline,
|
479
|
+
)
|
480
|
+
from .cogview3 import CogView3PlusPipeline
|
460
481
|
from .controlnet import (
|
461
482
|
BlipDiffusionControlNetPipeline,
|
462
483
|
StableDiffusionControlNetImg2ImgPipeline,
|
@@ -469,9 +490,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
469
490
|
from .controlnet_hunyuandit import (
|
470
491
|
HunyuanDiTControlNetPipeline,
|
471
492
|
)
|
472
|
-
from .controlnet_sd3 import
|
473
|
-
StableDiffusion3ControlNetPipeline,
|
474
|
-
)
|
493
|
+
from .controlnet_sd3 import StableDiffusion3ControlNetInpaintingPipeline, StableDiffusion3ControlNetPipeline
|
475
494
|
from .controlnet_xs import (
|
476
495
|
StableDiffusionControlNetXSPipeline,
|
477
496
|
StableDiffusionXLControlNetXSPipeline,
|
@@ -498,7 +517,14 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
498
517
|
VersatileDiffusionTextToImagePipeline,
|
499
518
|
VQDiffusionPipeline,
|
500
519
|
)
|
501
|
-
from .flux import
|
520
|
+
from .flux import (
|
521
|
+
FluxControlNetImg2ImgPipeline,
|
522
|
+
FluxControlNetInpaintPipeline,
|
523
|
+
FluxControlNetPipeline,
|
524
|
+
FluxImg2ImgPipeline,
|
525
|
+
FluxInpaintPipeline,
|
526
|
+
FluxPipeline,
|
527
|
+
)
|
502
528
|
from .hunyuandit import HunyuanDiTPipeline
|
503
529
|
from .i2vgen_xl import I2VGenXLPipeline
|
504
530
|
from .kandinsky import (
|
@@ -550,8 +576,11 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
550
576
|
KolorsPAGPipeline,
|
551
577
|
PixArtSigmaPAGPipeline,
|
552
578
|
StableDiffusion3PAGPipeline,
|
579
|
+
StableDiffusionControlNetPAGInpaintPipeline,
|
553
580
|
StableDiffusionControlNetPAGPipeline,
|
581
|
+
StableDiffusionPAGImg2ImgPipeline,
|
554
582
|
StableDiffusionPAGPipeline,
|
583
|
+
StableDiffusionXLControlNetPAGImg2ImgPipeline,
|
555
584
|
StableDiffusionXLControlNetPAGPipeline,
|
556
585
|
StableDiffusionXLPAGImg2ImgPipeline,
|
557
586
|
StableDiffusionXLPAGInpaintPipeline,
|
@@ -26,6 +26,7 @@ else:
|
|
26
26
|
_import_structure["pipeline_animatediff_sdxl"] = ["AnimateDiffSDXLPipeline"]
|
27
27
|
_import_structure["pipeline_animatediff_sparsectrl"] = ["AnimateDiffSparseControlNetPipeline"]
|
28
28
|
_import_structure["pipeline_animatediff_video2video"] = ["AnimateDiffVideoToVideoPipeline"]
|
29
|
+
_import_structure["pipeline_animatediff_video2video_controlnet"] = ["AnimateDiffVideoToVideoControlNetPipeline"]
|
29
30
|
|
30
31
|
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
31
32
|
try:
|
@@ -40,6 +41,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
40
41
|
from .pipeline_animatediff_sdxl import AnimateDiffSDXLPipeline
|
41
42
|
from .pipeline_animatediff_sparsectrl import AnimateDiffSparseControlNetPipeline
|
42
43
|
from .pipeline_animatediff_video2video import AnimateDiffVideoToVideoPipeline
|
44
|
+
from .pipeline_animatediff_video2video_controlnet import AnimateDiffVideoToVideoControlNetPipeline
|
43
45
|
from .pipeline_output import AnimateDiffPipelineOutput
|
44
46
|
|
45
47
|
else:
|
@@ -432,7 +432,6 @@ class AnimateDiffPipeline(
|
|
432
432
|
extra_step_kwargs["generator"] = generator
|
433
433
|
return extra_step_kwargs
|
434
434
|
|
435
|
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.check_inputs
|
436
435
|
def check_inputs(
|
437
436
|
self,
|
438
437
|
prompt,
|
@@ -470,8 +469,8 @@ class AnimateDiffPipeline(
|
|
470
469
|
raise ValueError(
|
471
470
|
"Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined."
|
472
471
|
)
|
473
|
-
elif prompt is not None and
|
474
|
-
raise ValueError(f"`prompt` has to be of type `str` or `
|
472
|
+
elif prompt is not None and not isinstance(prompt, (str, list, dict)):
|
473
|
+
raise ValueError(f"`prompt` has to be of type `str`, `list` or `dict` but is {type(prompt)=}")
|
475
474
|
|
476
475
|
if negative_prompt is not None and negative_prompt_embeds is not None:
|
477
476
|
raise ValueError(
|
@@ -557,11 +556,15 @@ class AnimateDiffPipeline(
|
|
557
556
|
def num_timesteps(self):
|
558
557
|
return self._num_timesteps
|
559
558
|
|
559
|
+
@property
|
560
|
+
def interrupt(self):
|
561
|
+
return self._interrupt
|
562
|
+
|
560
563
|
@torch.no_grad()
|
561
564
|
@replace_example_docstring(EXAMPLE_DOC_STRING)
|
562
565
|
def __call__(
|
563
566
|
self,
|
564
|
-
prompt: Union[str, List[str]] = None,
|
567
|
+
prompt: Optional[Union[str, List[str]]] = None,
|
565
568
|
num_frames: Optional[int] = 16,
|
566
569
|
height: Optional[int] = None,
|
567
570
|
width: Optional[int] = None,
|
@@ -701,9 +704,10 @@ class AnimateDiffPipeline(
|
|
701
704
|
self._guidance_scale = guidance_scale
|
702
705
|
self._clip_skip = clip_skip
|
703
706
|
self._cross_attention_kwargs = cross_attention_kwargs
|
707
|
+
self._interrupt = False
|
704
708
|
|
705
709
|
# 2. Define call parameters
|
706
|
-
if prompt is not None and isinstance(prompt, str):
|
710
|
+
if prompt is not None and isinstance(prompt, (str, dict)):
|
707
711
|
batch_size = 1
|
708
712
|
elif prompt is not None and isinstance(prompt, list):
|
709
713
|
batch_size = len(prompt)
|
@@ -716,22 +720,39 @@ class AnimateDiffPipeline(
|
|
716
720
|
text_encoder_lora_scale = (
|
717
721
|
self.cross_attention_kwargs.get("scale", None) if self.cross_attention_kwargs is not None else None
|
718
722
|
)
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
723
|
+
if self.free_noise_enabled:
|
724
|
+
prompt_embeds, negative_prompt_embeds = self._encode_prompt_free_noise(
|
725
|
+
prompt=prompt,
|
726
|
+
num_frames=num_frames,
|
727
|
+
device=device,
|
728
|
+
num_videos_per_prompt=num_videos_per_prompt,
|
729
|
+
do_classifier_free_guidance=self.do_classifier_free_guidance,
|
730
|
+
negative_prompt=negative_prompt,
|
731
|
+
prompt_embeds=prompt_embeds,
|
732
|
+
negative_prompt_embeds=negative_prompt_embeds,
|
733
|
+
lora_scale=text_encoder_lora_scale,
|
734
|
+
clip_skip=self.clip_skip,
|
735
|
+
)
|
736
|
+
else:
|
737
|
+
prompt_embeds, negative_prompt_embeds = self.encode_prompt(
|
738
|
+
prompt,
|
739
|
+
device,
|
740
|
+
num_videos_per_prompt,
|
741
|
+
self.do_classifier_free_guidance,
|
742
|
+
negative_prompt,
|
743
|
+
prompt_embeds=prompt_embeds,
|
744
|
+
negative_prompt_embeds=negative_prompt_embeds,
|
745
|
+
lora_scale=text_encoder_lora_scale,
|
746
|
+
clip_skip=self.clip_skip,
|
747
|
+
)
|
748
|
+
|
749
|
+
# For classifier free guidance, we need to do two forward passes.
|
750
|
+
# Here we concatenate the unconditional and text embeddings into a single batch
|
751
|
+
# to avoid doing two forward passes
|
752
|
+
if self.do_classifier_free_guidance:
|
753
|
+
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
|
754
|
+
|
755
|
+
prompt_embeds = prompt_embeds.repeat_interleave(repeats=num_frames, dim=0)
|
735
756
|
|
736
757
|
if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
|
737
758
|
image_embeds = self.prepare_ip_adapter_image_embeds(
|
@@ -783,6 +804,9 @@ class AnimateDiffPipeline(
|
|
783
804
|
# 8. Denoising loop
|
784
805
|
with self.progress_bar(total=self._num_timesteps) as progress_bar:
|
785
806
|
for i, t in enumerate(timesteps):
|
807
|
+
if self.interrupt:
|
808
|
+
continue
|
809
|
+
|
786
810
|
# expand the latents if we are doing classifier free guidance
|
787
811
|
latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
|
788
812
|
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
@@ -505,8 +505,8 @@ class AnimateDiffControlNetPipeline(
|
|
505
505
|
raise ValueError(
|
506
506
|
"Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined."
|
507
507
|
)
|
508
|
-
elif prompt is not None and
|
509
|
-
raise ValueError(f"`prompt` has to be of type `str` or `
|
508
|
+
elif prompt is not None and not isinstance(prompt, (str, list, dict)):
|
509
|
+
raise ValueError(f"`prompt` has to be of type `str`, `list` or `dict` but is {type(prompt)}")
|
510
510
|
|
511
511
|
if negative_prompt is not None and negative_prompt_embeds is not None:
|
512
512
|
raise ValueError(
|
@@ -699,6 +699,10 @@ class AnimateDiffControlNetPipeline(
|
|
699
699
|
def num_timesteps(self):
|
700
700
|
return self._num_timesteps
|
701
701
|
|
702
|
+
@property
|
703
|
+
def interrupt(self):
|
704
|
+
return self._interrupt
|
705
|
+
|
702
706
|
@torch.no_grad()
|
703
707
|
def __call__(
|
704
708
|
self,
|
@@ -858,9 +862,10 @@ class AnimateDiffControlNetPipeline(
|
|
858
862
|
self._guidance_scale = guidance_scale
|
859
863
|
self._clip_skip = clip_skip
|
860
864
|
self._cross_attention_kwargs = cross_attention_kwargs
|
865
|
+
self._interrupt = False
|
861
866
|
|
862
867
|
# 2. Define call parameters
|
863
|
-
if prompt is not None and isinstance(prompt, str):
|
868
|
+
if prompt is not None and isinstance(prompt, (str, dict)):
|
864
869
|
batch_size = 1
|
865
870
|
elif prompt is not None and isinstance(prompt, list):
|
866
871
|
batch_size = len(prompt)
|
@@ -883,22 +888,39 @@ class AnimateDiffControlNetPipeline(
|
|
883
888
|
text_encoder_lora_scale = (
|
884
889
|
cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
|
885
890
|
)
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
891
|
+
if self.free_noise_enabled:
|
892
|
+
prompt_embeds, negative_prompt_embeds = self._encode_prompt_free_noise(
|
893
|
+
prompt=prompt,
|
894
|
+
num_frames=num_frames,
|
895
|
+
device=device,
|
896
|
+
num_videos_per_prompt=num_videos_per_prompt,
|
897
|
+
do_classifier_free_guidance=self.do_classifier_free_guidance,
|
898
|
+
negative_prompt=negative_prompt,
|
899
|
+
prompt_embeds=prompt_embeds,
|
900
|
+
negative_prompt_embeds=negative_prompt_embeds,
|
901
|
+
lora_scale=text_encoder_lora_scale,
|
902
|
+
clip_skip=self.clip_skip,
|
903
|
+
)
|
904
|
+
else:
|
905
|
+
prompt_embeds, negative_prompt_embeds = self.encode_prompt(
|
906
|
+
prompt,
|
907
|
+
device,
|
908
|
+
num_videos_per_prompt,
|
909
|
+
self.do_classifier_free_guidance,
|
910
|
+
negative_prompt,
|
911
|
+
prompt_embeds=prompt_embeds,
|
912
|
+
negative_prompt_embeds=negative_prompt_embeds,
|
913
|
+
lora_scale=text_encoder_lora_scale,
|
914
|
+
clip_skip=self.clip_skip,
|
915
|
+
)
|
916
|
+
|
917
|
+
# For classifier free guidance, we need to do two forward passes.
|
918
|
+
# Here we concatenate the unconditional and text embeddings into a single batch
|
919
|
+
# to avoid doing two forward passes
|
920
|
+
if self.do_classifier_free_guidance:
|
921
|
+
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
|
922
|
+
|
923
|
+
prompt_embeds = prompt_embeds.repeat_interleave(repeats=num_frames, dim=0)
|
902
924
|
|
903
925
|
if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
|
904
926
|
image_embeds = self.prepare_ip_adapter_image_embeds(
|
@@ -990,6 +1012,9 @@ class AnimateDiffControlNetPipeline(
|
|
990
1012
|
# 8. Denoising loop
|
991
1013
|
with self.progress_bar(total=self._num_timesteps) as progress_bar:
|
992
1014
|
for i, t in enumerate(timesteps):
|
1015
|
+
if self.interrupt:
|
1016
|
+
continue
|
1017
|
+
|
993
1018
|
# expand the latents if we are doing classifier free guidance
|
994
1019
|
latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
|
995
1020
|
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
@@ -1002,7 +1027,6 @@ class AnimateDiffControlNetPipeline(
|
|
1002
1027
|
else:
|
1003
1028
|
control_model_input = latent_model_input
|
1004
1029
|
controlnet_prompt_embeds = prompt_embeds
|
1005
|
-
controlnet_prompt_embeds = controlnet_prompt_embeds.repeat_interleave(num_frames, dim=0)
|
1006
1030
|
|
1007
1031
|
if isinstance(controlnet_keep[i], list):
|
1008
1032
|
cond_scale = [c * s for c, s in zip(controlnet_conditioning_scale, controlnet_keep[i])]
|
@@ -113,9 +113,21 @@ EXAMPLE_DOC_STRING = """
|
|
113
113
|
|
114
114
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
|
115
115
|
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
116
|
-
"""
|
117
|
-
|
118
|
-
|
116
|
+
r"""
|
117
|
+
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
118
|
+
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
119
|
+
Flawed](https://arxiv.org/pdf/2305.08891.pdf).
|
120
|
+
|
121
|
+
Args:
|
122
|
+
noise_cfg (`torch.Tensor`):
|
123
|
+
The predicted noise tensor for the guided diffusion process.
|
124
|
+
noise_pred_text (`torch.Tensor`):
|
125
|
+
The predicted noise tensor for the text-guided diffusion process.
|
126
|
+
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
127
|
+
A rescale factor applied to the noise predictions.
|
128
|
+
|
129
|
+
Returns:
|
130
|
+
noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
|
119
131
|
"""
|
120
132
|
std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
|
121
133
|
std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
|
@@ -135,7 +147,7 @@ def retrieve_timesteps(
|
|
135
147
|
sigmas: Optional[List[float]] = None,
|
136
148
|
**kwargs,
|
137
149
|
):
|
138
|
-
"""
|
150
|
+
r"""
|
139
151
|
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
|
140
152
|
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
|
141
153
|
|
@@ -1143,6 +1155,8 @@ class AnimateDiffSDXLPipeline(
|
|
1143
1155
|
add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
|
1144
1156
|
add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
|
1145
1157
|
|
1158
|
+
prompt_embeds = prompt_embeds.repeat_interleave(repeats=num_frames, dim=0)
|
1159
|
+
|
1146
1160
|
prompt_embeds = prompt_embeds.to(device)
|
1147
1161
|
add_text_embeds = add_text_embeds.to(device)
|
1148
1162
|
add_time_ids = add_time_ids.to(device).repeat(batch_size * num_videos_per_prompt, 1)
|
@@ -878,6 +878,8 @@ class AnimateDiffSparseControlNetPipeline(
|
|
878
878
|
if self.do_classifier_free_guidance:
|
879
879
|
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
|
880
880
|
|
881
|
+
prompt_embeds = prompt_embeds.repeat_interleave(repeats=num_frames, dim=0)
|
882
|
+
|
881
883
|
# 4. Prepare IP-Adapter embeddings
|
882
884
|
if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
|
883
885
|
image_embeds = self.prepare_ip_adapter_image_embeds(
|