diffusers 0.30.2__py3-none-any.whl → 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. diffusers/__init__.py +38 -2
  2. diffusers/configuration_utils.py +12 -0
  3. diffusers/dependency_versions_table.py +1 -1
  4. diffusers/image_processor.py +257 -54
  5. diffusers/loaders/__init__.py +2 -0
  6. diffusers/loaders/ip_adapter.py +5 -1
  7. diffusers/loaders/lora_base.py +14 -7
  8. diffusers/loaders/lora_conversion_utils.py +332 -0
  9. diffusers/loaders/lora_pipeline.py +707 -41
  10. diffusers/loaders/peft.py +1 -0
  11. diffusers/loaders/single_file_utils.py +81 -4
  12. diffusers/loaders/textual_inversion.py +2 -0
  13. diffusers/loaders/unet.py +39 -8
  14. diffusers/models/__init__.py +4 -0
  15. diffusers/models/adapter.py +53 -53
  16. diffusers/models/attention.py +86 -10
  17. diffusers/models/attention_processor.py +169 -133
  18. diffusers/models/autoencoders/autoencoder_kl.py +71 -11
  19. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +287 -85
  20. diffusers/models/controlnet_flux.py +536 -0
  21. diffusers/models/controlnet_sd3.py +7 -3
  22. diffusers/models/controlnet_sparsectrl.py +0 -1
  23. diffusers/models/embeddings.py +238 -61
  24. diffusers/models/embeddings_flax.py +23 -9
  25. diffusers/models/model_loading_utils.py +182 -14
  26. diffusers/models/modeling_utils.py +283 -46
  27. diffusers/models/normalization.py +79 -0
  28. diffusers/models/transformers/__init__.py +1 -0
  29. diffusers/models/transformers/auraflow_transformer_2d.py +1 -0
  30. diffusers/models/transformers/cogvideox_transformer_3d.py +58 -36
  31. diffusers/models/transformers/pixart_transformer_2d.py +9 -1
  32. diffusers/models/transformers/transformer_cogview3plus.py +386 -0
  33. diffusers/models/transformers/transformer_flux.py +161 -44
  34. diffusers/models/transformers/transformer_sd3.py +7 -1
  35. diffusers/models/unets/unet_2d_condition.py +8 -8
  36. diffusers/models/unets/unet_motion_model.py +41 -63
  37. diffusers/models/upsampling.py +6 -6
  38. diffusers/pipelines/__init__.py +40 -7
  39. diffusers/pipelines/animatediff/__init__.py +2 -0
  40. diffusers/pipelines/animatediff/pipeline_animatediff.py +45 -21
  41. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +44 -20
  42. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +18 -4
  43. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +2 -0
  44. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +104 -66
  45. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
  46. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +1 -1
  47. diffusers/pipelines/auto_pipeline.py +39 -8
  48. diffusers/pipelines/cogvideo/__init__.py +6 -0
  49. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +32 -34
  50. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +794 -0
  51. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +837 -0
  52. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +825 -0
  53. diffusers/pipelines/cogvideo/pipeline_output.py +20 -0
  54. diffusers/pipelines/cogview3/__init__.py +47 -0
  55. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
  56. diffusers/pipelines/cogview3/pipeline_output.py +21 -0
  57. diffusers/pipelines/controlnet/pipeline_controlnet.py +9 -1
  58. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +8 -0
  59. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +8 -0
  60. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +36 -13
  61. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +9 -1
  62. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +8 -1
  63. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +17 -3
  64. diffusers/pipelines/controlnet_sd3/__init__.py +4 -0
  65. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +3 -1
  66. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
  67. diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
  68. diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
  69. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +16 -4
  70. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +1 -1
  71. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +1 -1
  72. diffusers/pipelines/flux/__init__.py +10 -0
  73. diffusers/pipelines/flux/pipeline_flux.py +53 -20
  74. diffusers/pipelines/flux/pipeline_flux_controlnet.py +984 -0
  75. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +988 -0
  76. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1182 -0
  77. diffusers/pipelines/flux/pipeline_flux_img2img.py +850 -0
  78. diffusers/pipelines/flux/pipeline_flux_inpaint.py +1015 -0
  79. diffusers/pipelines/free_noise_utils.py +365 -5
  80. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +15 -3
  81. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +2 -2
  82. diffusers/pipelines/kolors/pipeline_kolors.py +1 -1
  83. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +14 -11
  84. diffusers/pipelines/kolors/tokenizer.py +4 -0
  85. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +1 -1
  86. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +1 -1
  87. diffusers/pipelines/latte/pipeline_latte.py +2 -2
  88. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +15 -3
  89. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +15 -3
  90. diffusers/pipelines/lumina/pipeline_lumina.py +2 -2
  91. diffusers/pipelines/pag/__init__.py +6 -0
  92. diffusers/pipelines/pag/pag_utils.py +8 -2
  93. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1 -1
  94. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1544 -0
  95. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +2 -2
  96. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1685 -0
  97. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +17 -5
  98. diffusers/pipelines/pag/pipeline_pag_kolors.py +1 -1
  99. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +1 -1
  100. diffusers/pipelines/pag/pipeline_pag_sd.py +18 -6
  101. diffusers/pipelines/pag/pipeline_pag_sd_3.py +12 -3
  102. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +5 -1
  103. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1091 -0
  104. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +18 -6
  105. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +31 -16
  106. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +42 -19
  107. diffusers/pipelines/pia/pipeline_pia.py +2 -0
  108. diffusers/pipelines/pipeline_loading_utils.py +225 -27
  109. diffusers/pipelines/pipeline_utils.py +123 -180
  110. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +1 -1
  111. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +1 -1
  112. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +35 -3
  113. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +2 -2
  114. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +28 -6
  115. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +1 -1
  116. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +1 -1
  117. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +241 -81
  118. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +12 -3
  119. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +20 -4
  120. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +3 -3
  121. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
  122. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +16 -4
  123. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +16 -4
  124. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -4
  125. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +29 -14
  126. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +29 -14
  127. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +1 -1
  128. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +1 -1
  129. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +16 -4
  130. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +15 -3
  131. diffusers/quantizers/__init__.py +16 -0
  132. diffusers/quantizers/auto.py +126 -0
  133. diffusers/quantizers/base.py +233 -0
  134. diffusers/quantizers/bitsandbytes/__init__.py +2 -0
  135. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +558 -0
  136. diffusers/quantizers/bitsandbytes/utils.py +306 -0
  137. diffusers/quantizers/quantization_config.py +391 -0
  138. diffusers/schedulers/scheduling_ddim.py +4 -1
  139. diffusers/schedulers/scheduling_ddim_cogvideox.py +4 -1
  140. diffusers/schedulers/scheduling_ddim_parallel.py +4 -1
  141. diffusers/schedulers/scheduling_ddpm.py +4 -1
  142. diffusers/schedulers/scheduling_ddpm_parallel.py +4 -1
  143. diffusers/schedulers/scheduling_deis_multistep.py +78 -1
  144. diffusers/schedulers/scheduling_dpmsolver_multistep.py +82 -1
  145. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +80 -1
  146. diffusers/schedulers/scheduling_dpmsolver_sde.py +125 -10
  147. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +82 -1
  148. diffusers/schedulers/scheduling_edm_euler.py +8 -6
  149. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +4 -1
  150. diffusers/schedulers/scheduling_euler_discrete.py +92 -7
  151. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +4 -5
  152. diffusers/schedulers/scheduling_heun_discrete.py +114 -8
  153. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +116 -11
  154. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +110 -8
  155. diffusers/schedulers/scheduling_lms_discrete.py +76 -1
  156. diffusers/schedulers/scheduling_sasolver.py +78 -1
  157. diffusers/schedulers/scheduling_unclip.py +4 -1
  158. diffusers/schedulers/scheduling_unipc_multistep.py +78 -1
  159. diffusers/training_utils.py +48 -18
  160. diffusers/utils/__init__.py +2 -1
  161. diffusers/utils/dummy_pt_objects.py +60 -0
  162. diffusers/utils/dummy_torch_and_transformers_objects.py +195 -0
  163. diffusers/utils/hub_utils.py +16 -4
  164. diffusers/utils/import_utils.py +31 -8
  165. diffusers/utils/loading_utils.py +28 -4
  166. diffusers/utils/peft_utils.py +3 -3
  167. diffusers/utils/testing_utils.py +59 -0
  168. {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/METADATA +7 -6
  169. {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/RECORD +173 -147
  170. {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/WHEEL +1 -1
  171. {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/LICENSE +0 -0
  172. {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/entry_points.txt +0 -0
  173. {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/top_level.txt +0 -0
@@ -119,7 +119,7 @@ def retrieve_timesteps(
119
119
  sigmas: Optional[List[float]] = None,
120
120
  **kwargs,
121
121
  ):
122
- """
122
+ r"""
123
123
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
124
124
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
125
125
 
@@ -246,7 +246,6 @@ class AnimateDiffVideoToVideoPipeline(
246
246
  self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
247
247
  self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor)
248
248
 
249
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt with num_images_per_prompt -> num_videos_per_prompt
250
249
  def encode_prompt(
251
250
  self,
252
251
  prompt,
@@ -299,7 +298,7 @@ class AnimateDiffVideoToVideoPipeline(
299
298
  else:
300
299
  scale_lora_layers(self.text_encoder, lora_scale)
301
300
 
302
- if prompt is not None and isinstance(prompt, str):
301
+ if prompt is not None and isinstance(prompt, (str, dict)):
303
302
  batch_size = 1
304
303
  elif prompt is not None and isinstance(prompt, list):
305
304
  batch_size = len(prompt)
@@ -582,8 +581,8 @@ class AnimateDiffVideoToVideoPipeline(
582
581
  raise ValueError(
583
582
  "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined."
584
583
  )
585
- elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
586
- raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
584
+ elif prompt is not None and not isinstance(prompt, (str, list, dict)):
585
+ raise ValueError(f"`prompt` has to be of type `str`, `list` or `dict` but is {type(prompt)}")
587
586
 
588
587
  if negative_prompt is not None and negative_prompt_embeds is not None:
589
588
  raise ValueError(
@@ -628,23 +627,20 @@ class AnimateDiffVideoToVideoPipeline(
628
627
 
629
628
  def prepare_latents(
630
629
  self,
631
- video,
632
- height,
633
- width,
634
- num_channels_latents,
635
- batch_size,
636
- timestep,
637
- dtype,
638
- device,
639
- generator,
640
- latents=None,
630
+ video: Optional[torch.Tensor] = None,
631
+ height: int = 64,
632
+ width: int = 64,
633
+ num_channels_latents: int = 4,
634
+ batch_size: int = 1,
635
+ timestep: Optional[int] = None,
636
+ dtype: Optional[torch.dtype] = None,
637
+ device: Optional[torch.device] = None,
638
+ generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
639
+ latents: Optional[torch.Tensor] = None,
641
640
  decode_chunk_size: int = 16,
642
- ):
643
- if latents is None:
644
- num_frames = video.shape[1]
645
- else:
646
- num_frames = latents.shape[2]
647
-
641
+ add_noise: bool = False,
642
+ ) -> torch.Tensor:
643
+ num_frames = video.shape[1] if latents is None else latents.shape[2]
648
644
  shape = (
649
645
  batch_size,
650
646
  num_channels_latents,
@@ -708,8 +704,13 @@ class AnimateDiffVideoToVideoPipeline(
708
704
  if shape != latents.shape:
709
705
  # [B, C, F, H, W]
710
706
  raise ValueError(f"`latents` expected to have {shape=}, but found {latents.shape=}")
707
+
711
708
  latents = latents.to(device, dtype=dtype)
712
709
 
710
+ if add_noise:
711
+ noise = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
712
+ latents = self.scheduler.add_noise(latents, noise, timestep)
713
+
713
714
  return latents
714
715
 
715
716
  @property
@@ -735,6 +736,10 @@ class AnimateDiffVideoToVideoPipeline(
735
736
  def num_timesteps(self):
736
737
  return self._num_timesteps
737
738
 
739
+ @property
740
+ def interrupt(self):
741
+ return self._interrupt
742
+
738
743
  @torch.no_grad()
739
744
  def __call__(
740
745
  self,
@@ -743,6 +748,7 @@ class AnimateDiffVideoToVideoPipeline(
743
748
  height: Optional[int] = None,
744
749
  width: Optional[int] = None,
745
750
  num_inference_steps: int = 50,
751
+ enforce_inference_steps: bool = False,
746
752
  timesteps: Optional[List[int]] = None,
747
753
  sigmas: Optional[List[float]] = None,
748
754
  guidance_scale: float = 7.5,
@@ -874,9 +880,10 @@ class AnimateDiffVideoToVideoPipeline(
874
880
  self._guidance_scale = guidance_scale
875
881
  self._clip_skip = clip_skip
876
882
  self._cross_attention_kwargs = cross_attention_kwargs
883
+ self._interrupt = False
877
884
 
878
885
  # 2. Define call parameters
879
- if prompt is not None and isinstance(prompt, str):
886
+ if prompt is not None and isinstance(prompt, (str, dict)):
880
887
  batch_size = 1
881
888
  elif prompt is not None and isinstance(prompt, list):
882
889
  batch_size = len(prompt)
@@ -884,51 +891,29 @@ class AnimateDiffVideoToVideoPipeline(
884
891
  batch_size = prompt_embeds.shape[0]
885
892
 
886
893
  device = self._execution_device
894
+ dtype = self.dtype
887
895
 
888
- # 3. Encode input prompt
889
- text_encoder_lora_scale = (
890
- self.cross_attention_kwargs.get("scale", None) if self.cross_attention_kwargs is not None else None
891
- )
892
- prompt_embeds, negative_prompt_embeds = self.encode_prompt(
893
- prompt,
894
- device,
895
- num_videos_per_prompt,
896
- self.do_classifier_free_guidance,
897
- negative_prompt,
898
- prompt_embeds=prompt_embeds,
899
- negative_prompt_embeds=negative_prompt_embeds,
900
- lora_scale=text_encoder_lora_scale,
901
- clip_skip=self.clip_skip,
902
- )
903
-
904
- # For classifier free guidance, we need to do two forward passes.
905
- # Here we concatenate the unconditional and text embeddings into a single batch
906
- # to avoid doing two forward passes
907
- if self.do_classifier_free_guidance:
908
- prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
909
-
910
- if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
911
- image_embeds = self.prepare_ip_adapter_image_embeds(
912
- ip_adapter_image,
913
- ip_adapter_image_embeds,
914
- device,
915
- batch_size * num_videos_per_prompt,
916
- self.do_classifier_free_guidance,
896
+ # 3. Prepare timesteps
897
+ if not enforce_inference_steps:
898
+ timesteps, num_inference_steps = retrieve_timesteps(
899
+ self.scheduler, num_inference_steps, device, timesteps, sigmas
917
900
  )
901
+ timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, timesteps, strength, device)
902
+ latent_timestep = timesteps[:1].repeat(batch_size * num_videos_per_prompt)
903
+ else:
904
+ denoising_inference_steps = int(num_inference_steps / strength)
905
+ timesteps, denoising_inference_steps = retrieve_timesteps(
906
+ self.scheduler, denoising_inference_steps, device, timesteps, sigmas
907
+ )
908
+ timesteps = timesteps[-num_inference_steps:]
909
+ latent_timestep = timesteps[:1].repeat(batch_size * num_videos_per_prompt)
918
910
 
919
- # 4. Prepare timesteps
920
- timesteps, num_inference_steps = retrieve_timesteps(
921
- self.scheduler, num_inference_steps, device, timesteps, sigmas
922
- )
923
- timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, timesteps, strength, device)
924
- latent_timestep = timesteps[:1].repeat(batch_size * num_videos_per_prompt)
925
-
926
- # 5. Prepare latent variables
911
+ # 4. Prepare latent variables
927
912
  if latents is None:
928
913
  video = self.video_processor.preprocess_video(video, height=height, width=width)
929
914
  # Move the number of frames before the number of channels.
930
915
  video = video.permute(0, 2, 1, 3, 4)
931
- video = video.to(device=device, dtype=prompt_embeds.dtype)
916
+ video = video.to(device=device, dtype=dtype)
932
917
  num_channels_latents = self.unet.config.in_channels
933
918
  latents = self.prepare_latents(
934
919
  video=video,
@@ -937,17 +922,67 @@ class AnimateDiffVideoToVideoPipeline(
937
922
  num_channels_latents=num_channels_latents,
938
923
  batch_size=batch_size * num_videos_per_prompt,
939
924
  timestep=latent_timestep,
940
- dtype=prompt_embeds.dtype,
925
+ dtype=dtype,
941
926
  device=device,
942
927
  generator=generator,
943
928
  latents=latents,
944
929
  decode_chunk_size=decode_chunk_size,
930
+ add_noise=enforce_inference_steps,
931
+ )
932
+
933
+ # 5. Encode input prompt
934
+ text_encoder_lora_scale = (
935
+ self.cross_attention_kwargs.get("scale", None) if self.cross_attention_kwargs is not None else None
945
936
  )
937
+ num_frames = latents.shape[2]
938
+ if self.free_noise_enabled:
939
+ prompt_embeds, negative_prompt_embeds = self._encode_prompt_free_noise(
940
+ prompt=prompt,
941
+ num_frames=num_frames,
942
+ device=device,
943
+ num_videos_per_prompt=num_videos_per_prompt,
944
+ do_classifier_free_guidance=self.do_classifier_free_guidance,
945
+ negative_prompt=negative_prompt,
946
+ prompt_embeds=prompt_embeds,
947
+ negative_prompt_embeds=negative_prompt_embeds,
948
+ lora_scale=text_encoder_lora_scale,
949
+ clip_skip=self.clip_skip,
950
+ )
951
+ else:
952
+ prompt_embeds, negative_prompt_embeds = self.encode_prompt(
953
+ prompt,
954
+ device,
955
+ num_videos_per_prompt,
956
+ self.do_classifier_free_guidance,
957
+ negative_prompt,
958
+ prompt_embeds=prompt_embeds,
959
+ negative_prompt_embeds=negative_prompt_embeds,
960
+ lora_scale=text_encoder_lora_scale,
961
+ clip_skip=self.clip_skip,
962
+ )
963
+
964
+ # For classifier free guidance, we need to do two forward passes.
965
+ # Here we concatenate the unconditional and text embeddings into a single batch
966
+ # to avoid doing two forward passes
967
+ if self.do_classifier_free_guidance:
968
+ prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
969
+
970
+ prompt_embeds = prompt_embeds.repeat_interleave(repeats=num_frames, dim=0)
971
+
972
+ # 6. Prepare IP-Adapter embeddings
973
+ if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
974
+ image_embeds = self.prepare_ip_adapter_image_embeds(
975
+ ip_adapter_image,
976
+ ip_adapter_image_embeds,
977
+ device,
978
+ batch_size * num_videos_per_prompt,
979
+ self.do_classifier_free_guidance,
980
+ )
946
981
 
947
- # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
982
+ # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
948
983
  extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
949
984
 
950
- # 7. Add image embeds for IP-Adapter
985
+ # 8. Add image embeds for IP-Adapter
951
986
  added_cond_kwargs = (
952
987
  {"image_embeds": image_embeds}
953
988
  if ip_adapter_image is not None or ip_adapter_image_embeds is not None
@@ -967,9 +1002,12 @@ class AnimateDiffVideoToVideoPipeline(
967
1002
  self._num_timesteps = len(timesteps)
968
1003
  num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
969
1004
 
970
- # 8. Denoising loop
1005
+ # 9. Denoising loop
971
1006
  with self.progress_bar(total=self._num_timesteps) as progress_bar:
972
1007
  for i, t in enumerate(timesteps):
1008
+ if self.interrupt:
1009
+ continue
1010
+
973
1011
  # expand the latents if we are doing classifier free guidance
974
1012
  latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
975
1013
  latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
@@ -1005,14 +1043,14 @@ class AnimateDiffVideoToVideoPipeline(
1005
1043
  if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
1006
1044
  progress_bar.update()
1007
1045
 
1008
- # 9. Post-processing
1046
+ # 10. Post-processing
1009
1047
  if output_type == "latent":
1010
1048
  video = latents
1011
1049
  else:
1012
1050
  video_tensor = self.decode_latents(latents, decode_chunk_size)
1013
1051
  video = self.video_processor.postprocess_video(video=video_tensor, output_type=output_type)
1014
1052
 
1015
- # 10. Offload all models
1053
+ # 11. Offload all models
1016
1054
  self.maybe_free_model_hooks()
1017
1055
 
1018
1056
  if not return_dict: