diffusers 0.29.2__py3-none-any.whl → 0.30.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. diffusers/__init__.py +94 -3
  2. diffusers/commands/env.py +1 -5
  3. diffusers/configuration_utils.py +4 -9
  4. diffusers/dependency_versions_table.py +2 -2
  5. diffusers/image_processor.py +1 -2
  6. diffusers/loaders/__init__.py +17 -2
  7. diffusers/loaders/ip_adapter.py +10 -7
  8. diffusers/loaders/lora_base.py +752 -0
  9. diffusers/loaders/lora_pipeline.py +2222 -0
  10. diffusers/loaders/peft.py +213 -5
  11. diffusers/loaders/single_file.py +1 -12
  12. diffusers/loaders/single_file_model.py +31 -10
  13. diffusers/loaders/single_file_utils.py +262 -2
  14. diffusers/loaders/textual_inversion.py +1 -6
  15. diffusers/loaders/unet.py +23 -208
  16. diffusers/models/__init__.py +20 -0
  17. diffusers/models/activations.py +22 -0
  18. diffusers/models/attention.py +386 -7
  19. diffusers/models/attention_processor.py +1795 -629
  20. diffusers/models/autoencoders/__init__.py +2 -0
  21. diffusers/models/autoencoders/autoencoder_kl.py +14 -3
  22. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +1035 -0
  23. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +1 -1
  24. diffusers/models/autoencoders/autoencoder_oobleck.py +464 -0
  25. diffusers/models/autoencoders/autoencoder_tiny.py +1 -0
  26. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  27. diffusers/models/autoencoders/vq_model.py +4 -4
  28. diffusers/models/controlnet.py +2 -3
  29. diffusers/models/controlnet_hunyuan.py +401 -0
  30. diffusers/models/controlnet_sd3.py +11 -11
  31. diffusers/models/controlnet_sparsectrl.py +789 -0
  32. diffusers/models/controlnet_xs.py +40 -10
  33. diffusers/models/downsampling.py +68 -0
  34. diffusers/models/embeddings.py +319 -36
  35. diffusers/models/model_loading_utils.py +1 -3
  36. diffusers/models/modeling_flax_utils.py +1 -6
  37. diffusers/models/modeling_utils.py +4 -16
  38. diffusers/models/normalization.py +203 -12
  39. diffusers/models/transformers/__init__.py +6 -0
  40. diffusers/models/transformers/auraflow_transformer_2d.py +527 -0
  41. diffusers/models/transformers/cogvideox_transformer_3d.py +345 -0
  42. diffusers/models/transformers/hunyuan_transformer_2d.py +19 -15
  43. diffusers/models/transformers/latte_transformer_3d.py +327 -0
  44. diffusers/models/transformers/lumina_nextdit2d.py +340 -0
  45. diffusers/models/transformers/pixart_transformer_2d.py +102 -1
  46. diffusers/models/transformers/prior_transformer.py +1 -1
  47. diffusers/models/transformers/stable_audio_transformer.py +458 -0
  48. diffusers/models/transformers/transformer_flux.py +455 -0
  49. diffusers/models/transformers/transformer_sd3.py +18 -4
  50. diffusers/models/unets/unet_1d_blocks.py +1 -1
  51. diffusers/models/unets/unet_2d_condition.py +8 -1
  52. diffusers/models/unets/unet_3d_blocks.py +51 -920
  53. diffusers/models/unets/unet_3d_condition.py +4 -1
  54. diffusers/models/unets/unet_i2vgen_xl.py +4 -1
  55. diffusers/models/unets/unet_kandinsky3.py +1 -1
  56. diffusers/models/unets/unet_motion_model.py +1330 -84
  57. diffusers/models/unets/unet_spatio_temporal_condition.py +1 -1
  58. diffusers/models/unets/unet_stable_cascade.py +1 -3
  59. diffusers/models/unets/uvit_2d.py +1 -1
  60. diffusers/models/upsampling.py +64 -0
  61. diffusers/models/vq_model.py +8 -4
  62. diffusers/optimization.py +1 -1
  63. diffusers/pipelines/__init__.py +100 -3
  64. diffusers/pipelines/animatediff/__init__.py +4 -0
  65. diffusers/pipelines/animatediff/pipeline_animatediff.py +50 -40
  66. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +1076 -0
  67. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +17 -27
  68. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +1008 -0
  69. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +51 -38
  70. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  71. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +1 -0
  72. diffusers/pipelines/aura_flow/__init__.py +48 -0
  73. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +591 -0
  74. diffusers/pipelines/auto_pipeline.py +97 -19
  75. diffusers/pipelines/cogvideo/__init__.py +48 -0
  76. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +687 -0
  77. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  78. diffusers/pipelines/controlnet/pipeline_controlnet.py +24 -30
  79. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +31 -30
  80. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +24 -153
  81. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +19 -28
  82. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +18 -28
  83. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +29 -32
  84. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -2
  85. diffusers/pipelines/controlnet_hunyuandit/__init__.py +48 -0
  86. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +1042 -0
  87. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +35 -0
  88. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +10 -6
  89. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +0 -4
  90. diffusers/pipelines/deepfloyd_if/pipeline_if.py +2 -2
  91. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +2 -2
  92. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +2 -2
  93. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +2 -2
  94. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +2 -2
  95. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +2 -2
  96. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -6
  97. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -6
  98. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +6 -6
  99. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
  100. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -10
  101. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +10 -6
  102. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +3 -3
  103. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +1 -1
  104. diffusers/pipelines/flux/__init__.py +47 -0
  105. diffusers/pipelines/flux/pipeline_flux.py +749 -0
  106. diffusers/pipelines/flux/pipeline_output.py +21 -0
  107. diffusers/pipelines/free_init_utils.py +2 -0
  108. diffusers/pipelines/free_noise_utils.py +236 -0
  109. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +2 -2
  110. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +2 -2
  111. diffusers/pipelines/kolors/__init__.py +54 -0
  112. diffusers/pipelines/kolors/pipeline_kolors.py +1070 -0
  113. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +1247 -0
  114. diffusers/pipelines/kolors/pipeline_output.py +21 -0
  115. diffusers/pipelines/kolors/text_encoder.py +889 -0
  116. diffusers/pipelines/kolors/tokenizer.py +334 -0
  117. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +30 -29
  118. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +23 -29
  119. diffusers/pipelines/latte/__init__.py +48 -0
  120. diffusers/pipelines/latte/pipeline_latte.py +881 -0
  121. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +4 -4
  122. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +0 -4
  123. diffusers/pipelines/lumina/__init__.py +48 -0
  124. diffusers/pipelines/lumina/pipeline_lumina.py +897 -0
  125. diffusers/pipelines/pag/__init__.py +67 -0
  126. diffusers/pipelines/pag/pag_utils.py +237 -0
  127. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1329 -0
  128. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +1612 -0
  129. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +953 -0
  130. diffusers/pipelines/pag/pipeline_pag_kolors.py +1136 -0
  131. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +872 -0
  132. diffusers/pipelines/pag/pipeline_pag_sd.py +1050 -0
  133. diffusers/pipelines/pag/pipeline_pag_sd_3.py +985 -0
  134. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +862 -0
  135. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +1333 -0
  136. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +1529 -0
  137. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +1753 -0
  138. diffusers/pipelines/pia/pipeline_pia.py +30 -37
  139. diffusers/pipelines/pipeline_flax_utils.py +4 -9
  140. diffusers/pipelines/pipeline_loading_utils.py +0 -3
  141. diffusers/pipelines/pipeline_utils.py +2 -14
  142. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +0 -1
  143. diffusers/pipelines/stable_audio/__init__.py +50 -0
  144. diffusers/pipelines/stable_audio/modeling_stable_audio.py +158 -0
  145. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +745 -0
  146. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +2 -0
  147. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  148. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +23 -29
  149. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +15 -8
  150. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +30 -29
  151. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +23 -152
  152. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +8 -4
  153. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +11 -11
  154. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +8 -6
  155. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +6 -6
  156. diffusers/pipelines/stable_diffusion_3/__init__.py +2 -0
  157. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +34 -3
  158. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +33 -7
  159. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +1201 -0
  160. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +3 -3
  161. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +6 -6
  162. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +5 -5
  163. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +5 -5
  164. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +6 -6
  165. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +0 -4
  166. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +23 -29
  167. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +27 -29
  168. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +3 -3
  169. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +17 -27
  170. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +26 -29
  171. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +17 -145
  172. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +0 -4
  173. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +6 -6
  174. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -28
  175. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +8 -6
  176. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +8 -6
  177. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +6 -4
  178. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +0 -4
  179. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
  180. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  181. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +5 -4
  182. diffusers/schedulers/__init__.py +8 -0
  183. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +572 -0
  184. diffusers/schedulers/scheduling_ddim.py +1 -1
  185. diffusers/schedulers/scheduling_ddim_cogvideox.py +449 -0
  186. diffusers/schedulers/scheduling_ddpm.py +1 -1
  187. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -1
  188. diffusers/schedulers/scheduling_deis_multistep.py +2 -2
  189. diffusers/schedulers/scheduling_dpm_cogvideox.py +489 -0
  190. diffusers/schedulers/scheduling_dpmsolver_multistep.py +1 -1
  191. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +1 -1
  192. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +64 -19
  193. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +2 -2
  194. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +63 -39
  195. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +321 -0
  196. diffusers/schedulers/scheduling_ipndm.py +1 -1
  197. diffusers/schedulers/scheduling_unipc_multistep.py +1 -1
  198. diffusers/schedulers/scheduling_utils.py +1 -3
  199. diffusers/schedulers/scheduling_utils_flax.py +1 -3
  200. diffusers/training_utils.py +99 -14
  201. diffusers/utils/__init__.py +2 -2
  202. diffusers/utils/dummy_pt_objects.py +210 -0
  203. diffusers/utils/dummy_torch_and_torchsde_objects.py +15 -0
  204. diffusers/utils/dummy_torch_and_transformers_and_sentencepiece_objects.py +47 -0
  205. diffusers/utils/dummy_torch_and_transformers_objects.py +315 -0
  206. diffusers/utils/dynamic_modules_utils.py +1 -11
  207. diffusers/utils/export_utils.py +1 -4
  208. diffusers/utils/hub_utils.py +45 -42
  209. diffusers/utils/import_utils.py +19 -16
  210. diffusers/utils/loading_utils.py +76 -3
  211. diffusers/utils/testing_utils.py +11 -8
  212. {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/METADATA +73 -83
  213. {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/RECORD +217 -164
  214. {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/WHEEL +1 -1
  215. diffusers/loaders/autoencoder.py +0 -146
  216. diffusers/loaders/controlnet.py +0 -136
  217. diffusers/loaders/lora.py +0 -1728
  218. {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/LICENSE +0 -0
  219. {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/entry_points.txt +0 -0
  220. {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/top_level.txt +0 -0
@@ -19,7 +19,7 @@ import torch
19
19
  from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
20
20
 
21
21
  from ...image_processor import PipelineImageInput
22
- from ...loaders import IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
22
+ from ...loaders import IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
23
23
  from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
24
24
  from ...models.lora import adjust_lora_scale_text_encoder
25
25
  from ...models.unets.unet_motion_model import MotionAdapter
@@ -35,6 +35,7 @@ from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_
35
35
  from ...utils.torch_utils import randn_tensor
36
36
  from ...video_processor import VideoProcessor
37
37
  from ..free_init_utils import FreeInitMixin
38
+ from ..free_noise_utils import AnimateDiffFreeNoiseMixin
38
39
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
39
40
  from .pipeline_output import AnimateDiffPipelineOutput
40
41
 
@@ -174,8 +175,9 @@ class AnimateDiffVideoToVideoPipeline(
174
175
  StableDiffusionMixin,
175
176
  TextualInversionLoaderMixin,
176
177
  IPAdapterMixin,
177
- LoraLoaderMixin,
178
+ StableDiffusionLoraLoaderMixin,
178
179
  FreeInitMixin,
180
+ AnimateDiffFreeNoiseMixin,
179
181
  ):
180
182
  r"""
181
183
  Pipeline for video-to-video generation.
@@ -185,8 +187,8 @@ class AnimateDiffVideoToVideoPipeline(
185
187
 
186
188
  The pipeline also inherits the following loading methods:
187
189
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
188
- - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
189
- - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
190
+ - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
191
+ - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
190
192
  - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
191
193
 
192
194
  Args:
@@ -288,7 +290,7 @@ class AnimateDiffVideoToVideoPipeline(
288
290
  """
289
291
  # set lora scale so that monkey patched LoRA
290
292
  # function of text encoder can correctly access it
291
- if lora_scale is not None and isinstance(self, LoraLoaderMixin):
293
+ if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
292
294
  self._lora_scale = lora_scale
293
295
 
294
296
  # dynamically adjust the LoRA scale
@@ -421,7 +423,7 @@ class AnimateDiffVideoToVideoPipeline(
421
423
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
422
424
 
423
425
  if self.text_encoder is not None:
424
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
426
+ if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
425
427
  # Retrieve the original scale by scaling back the LoRA layers
426
428
  unscale_lora_layers(self.text_encoder, lora_scale)
427
429
 
@@ -456,6 +458,9 @@ class AnimateDiffVideoToVideoPipeline(
456
458
  def prepare_ip_adapter_image_embeds(
457
459
  self, ip_adapter_image, ip_adapter_image_embeds, device, num_images_per_prompt, do_classifier_free_guidance
458
460
  ):
461
+ image_embeds = []
462
+ if do_classifier_free_guidance:
463
+ negative_image_embeds = []
459
464
  if ip_adapter_image_embeds is None:
460
465
  if not isinstance(ip_adapter_image, list):
461
466
  ip_adapter_image = [ip_adapter_image]
@@ -465,7 +470,6 @@ class AnimateDiffVideoToVideoPipeline(
465
470
  f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {len(self.unet.encoder_hid_proj.image_projection_layers)} IP Adapters."
466
471
  )
467
472
 
468
- image_embeds = []
469
473
  for single_ip_adapter_image, image_proj_layer in zip(
470
474
  ip_adapter_image, self.unet.encoder_hid_proj.image_projection_layers
471
475
  ):
@@ -473,46 +477,52 @@ class AnimateDiffVideoToVideoPipeline(
473
477
  single_image_embeds, single_negative_image_embeds = self.encode_image(
474
478
  single_ip_adapter_image, device, 1, output_hidden_state
475
479
  )
476
- single_image_embeds = torch.stack([single_image_embeds] * num_images_per_prompt, dim=0)
477
- single_negative_image_embeds = torch.stack(
478
- [single_negative_image_embeds] * num_images_per_prompt, dim=0
479
- )
480
480
 
481
+ image_embeds.append(single_image_embeds[None, :])
481
482
  if do_classifier_free_guidance:
482
- single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
483
- single_image_embeds = single_image_embeds.to(device)
484
-
485
- image_embeds.append(single_image_embeds)
483
+ negative_image_embeds.append(single_negative_image_embeds[None, :])
486
484
  else:
487
- repeat_dims = [1]
488
- image_embeds = []
489
485
  for single_image_embeds in ip_adapter_image_embeds:
490
486
  if do_classifier_free_guidance:
491
487
  single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
492
- single_image_embeds = single_image_embeds.repeat(
493
- num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
494
- )
495
- single_negative_image_embeds = single_negative_image_embeds.repeat(
496
- num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
497
- )
498
- single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
499
- else:
500
- single_image_embeds = single_image_embeds.repeat(
501
- num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
502
- )
488
+ negative_image_embeds.append(single_negative_image_embeds)
503
489
  image_embeds.append(single_image_embeds)
504
490
 
505
- return image_embeds
491
+ ip_adapter_image_embeds = []
492
+ for i, single_image_embeds in enumerate(image_embeds):
493
+ single_image_embeds = torch.cat([single_image_embeds] * num_images_per_prompt, dim=0)
494
+ if do_classifier_free_guidance:
495
+ single_negative_image_embeds = torch.cat([negative_image_embeds[i]] * num_images_per_prompt, dim=0)
496
+ single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds], dim=0)
506
497
 
507
- # Copied from diffusers.pipelines.text_to_video_synthesis/pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
508
- def decode_latents(self, latents):
498
+ single_image_embeds = single_image_embeds.to(device=device)
499
+ ip_adapter_image_embeds.append(single_image_embeds)
500
+
501
+ return ip_adapter_image_embeds
502
+
503
+ def encode_video(self, video, generator, decode_chunk_size: int = 16) -> torch.Tensor:
504
+ latents = []
505
+ for i in range(0, len(video), decode_chunk_size):
506
+ batch_video = video[i : i + decode_chunk_size]
507
+ batch_video = retrieve_latents(self.vae.encode(batch_video), generator=generator)
508
+ latents.append(batch_video)
509
+ return torch.cat(latents)
510
+
511
+ # Copied from diffusers.pipelines.animatediff.pipeline_animatediff.AnimateDiffPipeline.decode_latents
512
+ def decode_latents(self, latents, decode_chunk_size: int = 16):
509
513
  latents = 1 / self.vae.config.scaling_factor * latents
510
514
 
511
515
  batch_size, channels, num_frames, height, width = latents.shape
512
516
  latents = latents.permute(0, 2, 1, 3, 4).reshape(batch_size * num_frames, channels, height, width)
513
517
 
514
- image = self.vae.decode(latents).sample
515
- video = image[None, :].reshape((batch_size, num_frames, -1) + image.shape[2:]).permute(0, 2, 1, 3, 4)
518
+ video = []
519
+ for i in range(0, latents.shape[0], decode_chunk_size):
520
+ batch_latents = latents[i : i + decode_chunk_size]
521
+ batch_latents = self.vae.decode(batch_latents).sample
522
+ video.append(batch_latents)
523
+
524
+ video = torch.cat(video)
525
+ video = video[None, :].reshape((batch_size, num_frames, -1) + video.shape[2:]).permute(0, 2, 1, 3, 4)
516
526
  # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16
517
527
  video = video.float()
518
528
  return video
@@ -628,6 +638,7 @@ class AnimateDiffVideoToVideoPipeline(
628
638
  device,
629
639
  generator,
630
640
  latents=None,
641
+ decode_chunk_size: int = 16,
631
642
  ):
632
643
  if latents is None:
633
644
  num_frames = video.shape[1]
@@ -662,13 +673,11 @@ class AnimateDiffVideoToVideoPipeline(
662
673
  )
663
674
 
664
675
  init_latents = [
665
- retrieve_latents(self.vae.encode(video[i]), generator=generator[i]).unsqueeze(0)
676
+ self.encode_video(video[i], generator[i], decode_chunk_size).unsqueeze(0)
666
677
  for i in range(batch_size)
667
678
  ]
668
679
  else:
669
- init_latents = [
670
- retrieve_latents(self.vae.encode(vid), generator=generator).unsqueeze(0) for vid in video
671
- ]
680
+ init_latents = [self.encode_video(vid, generator, decode_chunk_size).unsqueeze(0) for vid in video]
672
681
 
673
682
  init_latents = torch.cat(init_latents, dim=0)
674
683
 
@@ -753,6 +762,7 @@ class AnimateDiffVideoToVideoPipeline(
753
762
  clip_skip: Optional[int] = None,
754
763
  callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
755
764
  callback_on_step_end_tensor_inputs: List[str] = ["latents"],
765
+ decode_chunk_size: int = 16,
756
766
  ):
757
767
  r"""
758
768
  The call function to the pipeline for generation.
@@ -828,6 +838,8 @@ class AnimateDiffVideoToVideoPipeline(
828
838
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
829
839
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
830
840
  `._callback_tensor_inputs` attribute of your pipeline class.
841
+ decode_chunk_size (`int`, defaults to `16`):
842
+ The number of frames to decode at a time when calling `decode_latents` method.
831
843
 
832
844
  Examples:
833
845
 
@@ -929,6 +941,7 @@ class AnimateDiffVideoToVideoPipeline(
929
941
  device=device,
930
942
  generator=generator,
931
943
  latents=latents,
944
+ decode_chunk_size=decode_chunk_size,
932
945
  )
933
946
 
934
947
  # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
@@ -996,7 +1009,7 @@ class AnimateDiffVideoToVideoPipeline(
996
1009
  if output_type == "latent":
997
1010
  video = latents
998
1011
  else:
999
- video_tensor = self.decode_latents(latents)
1012
+ video_tensor = self.decode_latents(latents, decode_chunk_size)
1000
1013
  video = self.video_processor.postprocess_video(video=video_tensor, output_type=output_type)
1001
1014
 
1002
1015
  # 10. Offload all models
@@ -544,7 +544,7 @@ class AudioLDM2UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoad
544
544
 
545
545
  def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
546
546
  if hasattr(module, "get_processor"):
547
- processors[f"{name}.processor"] = module.get_processor(return_deprecated_lora=True)
547
+ processors[f"{name}.processor"] = module.get_processor()
548
548
 
549
549
  for sub_name, child in module.named_children():
550
550
  fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)
@@ -286,6 +286,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
286
286
  The sequence of generated hidden-states.
287
287
  """
288
288
  max_new_tokens = max_new_tokens if max_new_tokens is not None else self.language_model.config.max_new_tokens
289
+ model_kwargs = self.language_model._get_initial_cache_position(inputs_embeds, model_kwargs)
289
290
  for _ in range(max_new_tokens):
290
291
  # prepare model inputs
291
292
  model_inputs = prepare_inputs_for_generation(inputs_embeds, **model_kwargs)
@@ -0,0 +1,48 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_torch_available,
9
+ is_transformers_available,
10
+ )
11
+
12
+
13
+ _dummy_objects = {}
14
+ _import_structure = {}
15
+
16
+
17
+ try:
18
+ if not (is_transformers_available() and is_torch_available()):
19
+ raise OptionalDependencyNotAvailable()
20
+ except OptionalDependencyNotAvailable:
21
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
22
+
23
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
24
+ else:
25
+ _import_structure["pipeline_aura_flow"] = ["AuraFlowPipeline"]
26
+
27
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
28
+ try:
29
+ if not (is_transformers_available() and is_torch_available()):
30
+ raise OptionalDependencyNotAvailable()
31
+
32
+ except OptionalDependencyNotAvailable:
33
+ from ...utils.dummy_torch_and_transformers_objects import *
34
+ else:
35
+ from .pipeline_aura_flow import AuraFlowPipeline
36
+
37
+ else:
38
+ import sys
39
+
40
+ sys.modules[__name__] = _LazyModule(
41
+ __name__,
42
+ globals()["__file__"],
43
+ _import_structure,
44
+ module_spec=__spec__,
45
+ )
46
+
47
+ for name, value in _dummy_objects.items():
48
+ setattr(sys.modules[__name__], name, value)