diffusers 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (478) hide show
  1. diffusers/__init__.py +48 -1
  2. diffusers/commands/__init__.py +1 -1
  3. diffusers/commands/diffusers_cli.py +1 -1
  4. diffusers/commands/env.py +1 -1
  5. diffusers/commands/fp16_safetensors.py +1 -1
  6. diffusers/dependency_versions_check.py +1 -1
  7. diffusers/dependency_versions_table.py +1 -1
  8. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  9. diffusers/hooks/faster_cache.py +2 -2
  10. diffusers/hooks/group_offloading.py +128 -29
  11. diffusers/hooks/hooks.py +2 -2
  12. diffusers/hooks/layerwise_casting.py +3 -3
  13. diffusers/hooks/pyramid_attention_broadcast.py +1 -1
  14. diffusers/image_processor.py +7 -2
  15. diffusers/loaders/__init__.py +4 -0
  16. diffusers/loaders/ip_adapter.py +5 -14
  17. diffusers/loaders/lora_base.py +212 -111
  18. diffusers/loaders/lora_conversion_utils.py +275 -34
  19. diffusers/loaders/lora_pipeline.py +1554 -819
  20. diffusers/loaders/peft.py +52 -109
  21. diffusers/loaders/single_file.py +2 -2
  22. diffusers/loaders/single_file_model.py +20 -4
  23. diffusers/loaders/single_file_utils.py +225 -5
  24. diffusers/loaders/textual_inversion.py +3 -2
  25. diffusers/loaders/transformer_flux.py +1 -1
  26. diffusers/loaders/transformer_sd3.py +2 -2
  27. diffusers/loaders/unet.py +2 -16
  28. diffusers/loaders/unet_loader_utils.py +1 -1
  29. diffusers/loaders/utils.py +1 -1
  30. diffusers/models/__init__.py +15 -1
  31. diffusers/models/activations.py +5 -5
  32. diffusers/models/adapter.py +2 -3
  33. diffusers/models/attention.py +4 -4
  34. diffusers/models/attention_flax.py +10 -10
  35. diffusers/models/attention_processor.py +14 -10
  36. diffusers/models/auto_model.py +47 -10
  37. diffusers/models/autoencoders/__init__.py +1 -0
  38. diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
  39. diffusers/models/autoencoders/autoencoder_dc.py +3 -3
  40. diffusers/models/autoencoders/autoencoder_kl.py +4 -4
  41. diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
  42. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
  43. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
  44. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
  45. diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
  46. diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
  47. diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
  48. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
  49. diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
  50. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
  51. diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
  52. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  53. diffusers/models/autoencoders/vae.py +13 -2
  54. diffusers/models/autoencoders/vq_model.py +2 -2
  55. diffusers/models/cache_utils.py +1 -1
  56. diffusers/models/controlnet.py +1 -1
  57. diffusers/models/controlnet_flux.py +1 -1
  58. diffusers/models/controlnet_sd3.py +1 -1
  59. diffusers/models/controlnet_sparsectrl.py +1 -1
  60. diffusers/models/controlnets/__init__.py +1 -0
  61. diffusers/models/controlnets/controlnet.py +3 -3
  62. diffusers/models/controlnets/controlnet_flax.py +1 -1
  63. diffusers/models/controlnets/controlnet_flux.py +16 -15
  64. diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
  65. diffusers/models/controlnets/controlnet_sana.py +290 -0
  66. diffusers/models/controlnets/controlnet_sd3.py +1 -1
  67. diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
  68. diffusers/models/controlnets/controlnet_union.py +1 -1
  69. diffusers/models/controlnets/controlnet_xs.py +7 -7
  70. diffusers/models/controlnets/multicontrolnet.py +4 -5
  71. diffusers/models/controlnets/multicontrolnet_union.py +5 -6
  72. diffusers/models/downsampling.py +2 -2
  73. diffusers/models/embeddings.py +10 -12
  74. diffusers/models/embeddings_flax.py +2 -2
  75. diffusers/models/lora.py +3 -3
  76. diffusers/models/modeling_utils.py +44 -14
  77. diffusers/models/normalization.py +4 -4
  78. diffusers/models/resnet.py +2 -2
  79. diffusers/models/resnet_flax.py +1 -1
  80. diffusers/models/transformers/__init__.py +5 -0
  81. diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
  82. diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
  83. diffusers/models/transformers/consisid_transformer_3d.py +1 -1
  84. diffusers/models/transformers/dit_transformer_2d.py +2 -2
  85. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  86. diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
  87. diffusers/models/transformers/latte_transformer_3d.py +4 -5
  88. diffusers/models/transformers/lumina_nextdit2d.py +2 -2
  89. diffusers/models/transformers/pixart_transformer_2d.py +3 -3
  90. diffusers/models/transformers/prior_transformer.py +1 -1
  91. diffusers/models/transformers/sana_transformer.py +8 -3
  92. diffusers/models/transformers/stable_audio_transformer.py +5 -9
  93. diffusers/models/transformers/t5_film_transformer.py +3 -3
  94. diffusers/models/transformers/transformer_2d.py +1 -1
  95. diffusers/models/transformers/transformer_allegro.py +1 -1
  96. diffusers/models/transformers/transformer_chroma.py +742 -0
  97. diffusers/models/transformers/transformer_cogview3plus.py +5 -10
  98. diffusers/models/transformers/transformer_cogview4.py +317 -25
  99. diffusers/models/transformers/transformer_cosmos.py +579 -0
  100. diffusers/models/transformers/transformer_flux.py +9 -11
  101. diffusers/models/transformers/transformer_hidream_image.py +942 -0
  102. diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
  103. diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
  104. diffusers/models/transformers/transformer_ltx.py +2 -2
  105. diffusers/models/transformers/transformer_lumina2.py +1 -1
  106. diffusers/models/transformers/transformer_mochi.py +1 -1
  107. diffusers/models/transformers/transformer_omnigen.py +2 -2
  108. diffusers/models/transformers/transformer_sd3.py +7 -7
  109. diffusers/models/transformers/transformer_temporal.py +1 -1
  110. diffusers/models/transformers/transformer_wan.py +24 -8
  111. diffusers/models/transformers/transformer_wan_vace.py +393 -0
  112. diffusers/models/unets/unet_1d.py +1 -1
  113. diffusers/models/unets/unet_1d_blocks.py +1 -1
  114. diffusers/models/unets/unet_2d.py +1 -1
  115. diffusers/models/unets/unet_2d_blocks.py +1 -1
  116. diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
  117. diffusers/models/unets/unet_2d_condition.py +2 -2
  118. diffusers/models/unets/unet_2d_condition_flax.py +2 -2
  119. diffusers/models/unets/unet_3d_blocks.py +1 -1
  120. diffusers/models/unets/unet_3d_condition.py +3 -3
  121. diffusers/models/unets/unet_i2vgen_xl.py +3 -3
  122. diffusers/models/unets/unet_kandinsky3.py +1 -1
  123. diffusers/models/unets/unet_motion_model.py +2 -2
  124. diffusers/models/unets/unet_stable_cascade.py +1 -1
  125. diffusers/models/upsampling.py +2 -2
  126. diffusers/models/vae_flax.py +2 -2
  127. diffusers/models/vq_model.py +1 -1
  128. diffusers/pipelines/__init__.py +37 -6
  129. diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
  130. diffusers/pipelines/amused/pipeline_amused.py +7 -6
  131. diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
  132. diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
  133. diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
  134. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
  135. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
  136. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
  137. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
  138. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
  139. diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
  140. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  141. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
  142. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
  143. diffusers/pipelines/auto_pipeline.py +6 -7
  144. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  145. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  146. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
  147. diffusers/pipelines/chroma/__init__.py +49 -0
  148. diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
  149. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
  150. diffusers/pipelines/chroma/pipeline_output.py +21 -0
  151. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
  152. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
  153. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
  154. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
  155. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
  156. diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
  157. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
  158. diffusers/pipelines/consisid/consisid_utils.py +2 -2
  159. diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
  160. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  161. diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
  162. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
  163. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
  164. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
  165. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
  166. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
  167. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
  168. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
  169. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
  170. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
  171. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  172. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
  173. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
  174. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  175. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
  176. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
  177. diffusers/pipelines/cosmos/__init__.py +54 -0
  178. diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
  179. diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
  180. diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
  181. diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
  182. diffusers/pipelines/cosmos/pipeline_output.py +40 -0
  183. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
  184. diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
  185. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  186. diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
  187. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
  188. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
  189. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
  190. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
  191. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
  192. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
  193. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
  194. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  195. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
  196. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  197. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
  198. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
  199. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  200. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  201. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  202. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  203. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  204. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
  205. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
  206. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
  207. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
  208. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
  209. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
  210. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  211. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
  212. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
  213. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
  214. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
  215. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
  216. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  217. diffusers/pipelines/dit/pipeline_dit.py +1 -1
  218. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
  219. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
  220. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
  221. diffusers/pipelines/flux/modeling_flux.py +1 -1
  222. diffusers/pipelines/flux/pipeline_flux.py +10 -17
  223. diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
  224. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
  225. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
  226. diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
  227. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
  228. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
  229. diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
  230. diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
  231. diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
  232. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
  233. diffusers/pipelines/free_init_utils.py +2 -2
  234. diffusers/pipelines/free_noise_utils.py +3 -3
  235. diffusers/pipelines/hidream_image/__init__.py +47 -0
  236. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
  237. diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
  238. diffusers/pipelines/hunyuan_video/__init__.py +2 -0
  239. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
  240. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
  241. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
  242. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
  243. diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
  244. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
  245. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
  246. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
  247. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
  248. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
  249. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
  250. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
  251. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  252. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
  253. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
  254. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
  255. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
  256. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
  257. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
  258. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
  259. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
  260. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
  261. diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
  262. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
  263. diffusers/pipelines/kolors/text_encoder.py +3 -3
  264. diffusers/pipelines/kolors/tokenizer.py +1 -1
  265. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
  266. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
  267. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  268. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
  269. diffusers/pipelines/latte/pipeline_latte.py +12 -12
  270. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
  271. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
  272. diffusers/pipelines/ltx/__init__.py +4 -0
  273. diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
  274. diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
  275. diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
  276. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
  277. diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
  278. diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
  279. diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
  280. diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
  281. diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
  282. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
  283. diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
  284. diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
  285. diffusers/pipelines/onnx_utils.py +15 -2
  286. diffusers/pipelines/pag/pag_utils.py +2 -2
  287. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
  288. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
  289. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
  290. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
  291. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
  292. diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
  293. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
  294. diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
  295. diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
  296. diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
  297. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
  298. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
  299. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
  300. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
  301. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
  302. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
  303. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
  304. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  305. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
  306. diffusers/pipelines/pia/pipeline_pia.py +8 -6
  307. diffusers/pipelines/pipeline_flax_utils.py +3 -4
  308. diffusers/pipelines/pipeline_loading_utils.py +89 -13
  309. diffusers/pipelines/pipeline_utils.py +105 -33
  310. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
  311. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
  312. diffusers/pipelines/sana/__init__.py +4 -0
  313. diffusers/pipelines/sana/pipeline_sana.py +23 -21
  314. diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
  315. diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
  316. diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
  317. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
  318. diffusers/pipelines/shap_e/camera.py +1 -1
  319. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  320. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  321. diffusers/pipelines/shap_e/renderer.py +3 -3
  322. diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
  323. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
  324. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
  325. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
  326. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
  327. diffusers/pipelines/stable_diffusion/__init__.py +0 -7
  328. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  329. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
  330. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  331. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
  332. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  333. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
  334. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
  335. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
  336. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
  337. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
  338. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
  339. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
  340. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
  341. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
  342. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
  343. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
  344. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
  345. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
  346. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
  347. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  348. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  349. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  350. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
  351. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
  352. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
  353. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
  354. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
  355. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
  356. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
  357. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
  358. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
  359. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
  360. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
  361. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
  362. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  363. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
  364. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  365. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
  366. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
  367. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
  368. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
  369. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
  370. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
  371. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
  372. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
  373. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
  374. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
  375. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
  376. diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
  377. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
  378. diffusers/pipelines/unclip/text_proj.py +2 -2
  379. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
  380. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  381. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
  382. diffusers/pipelines/visualcloze/__init__.py +52 -0
  383. diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
  384. diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
  385. diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
  386. diffusers/pipelines/wan/__init__.py +2 -0
  387. diffusers/pipelines/wan/pipeline_wan.py +17 -12
  388. diffusers/pipelines/wan/pipeline_wan_i2v.py +42 -20
  389. diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
  390. diffusers/pipelines/wan/pipeline_wan_video2video.py +18 -18
  391. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  392. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
  393. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  394. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  395. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
  396. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
  397. diffusers/quantizers/__init__.py +179 -1
  398. diffusers/quantizers/base.py +6 -1
  399. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
  400. diffusers/quantizers/bitsandbytes/utils.py +10 -7
  401. diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
  402. diffusers/quantizers/gguf/utils.py +16 -13
  403. diffusers/quantizers/quantization_config.py +18 -16
  404. diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
  405. diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
  406. diffusers/schedulers/__init__.py +3 -1
  407. diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
  408. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  409. diffusers/schedulers/scheduling_consistency_models.py +1 -1
  410. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
  411. diffusers/schedulers/scheduling_ddim.py +8 -8
  412. diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
  413. diffusers/schedulers/scheduling_ddim_flax.py +6 -6
  414. diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
  415. diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
  416. diffusers/schedulers/scheduling_ddpm.py +9 -9
  417. diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
  418. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
  419. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
  420. diffusers/schedulers/scheduling_deis_multistep.py +8 -8
  421. diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
  422. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
  423. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
  424. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
  425. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  426. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
  427. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
  428. diffusers/schedulers/scheduling_edm_euler.py +20 -11
  429. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
  430. diffusers/schedulers/scheduling_euler_discrete.py +3 -3
  431. diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
  432. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
  433. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
  434. diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
  435. diffusers/schedulers/scheduling_heun_discrete.py +2 -2
  436. diffusers/schedulers/scheduling_ipndm.py +2 -2
  437. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
  438. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
  439. diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
  440. diffusers/schedulers/scheduling_lcm.py +3 -3
  441. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  442. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  443. diffusers/schedulers/scheduling_pndm.py +4 -4
  444. diffusers/schedulers/scheduling_pndm_flax.py +4 -4
  445. diffusers/schedulers/scheduling_repaint.py +9 -9
  446. diffusers/schedulers/scheduling_sasolver.py +15 -15
  447. diffusers/schedulers/scheduling_scm.py +1 -1
  448. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  449. diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
  450. diffusers/schedulers/scheduling_tcd.py +3 -3
  451. diffusers/schedulers/scheduling_unclip.py +5 -5
  452. diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
  453. diffusers/schedulers/scheduling_utils.py +1 -1
  454. diffusers/schedulers/scheduling_utils_flax.py +1 -1
  455. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  456. diffusers/training_utils.py +13 -5
  457. diffusers/utils/__init__.py +5 -0
  458. diffusers/utils/accelerate_utils.py +1 -1
  459. diffusers/utils/doc_utils.py +1 -1
  460. diffusers/utils/dummy_pt_objects.py +120 -0
  461. diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
  462. diffusers/utils/dynamic_modules_utils.py +21 -3
  463. diffusers/utils/export_utils.py +1 -1
  464. diffusers/utils/import_utils.py +81 -18
  465. diffusers/utils/logging.py +1 -1
  466. diffusers/utils/outputs.py +2 -1
  467. diffusers/utils/peft_utils.py +91 -8
  468. diffusers/utils/state_dict_utils.py +20 -3
  469. diffusers/utils/testing_utils.py +59 -7
  470. diffusers/utils/torch_utils.py +25 -5
  471. diffusers/video_processor.py +2 -2
  472. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/METADATA +3 -3
  473. diffusers-0.34.0.dist-info/RECORD +639 -0
  474. diffusers-0.33.0.dist-info/RECORD +0 -608
  475. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
  476. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/WHEEL +0 -0
  477. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
  478. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -24,7 +24,7 @@ from ...models import AutoencoderKL, UNet2DConditionModel
24
24
  from ...schedulers import KarrasDiffusionSchedulers
25
25
  from ...utils import is_torch_xla_available, logging, replace_example_docstring
26
26
  from ...utils.torch_utils import randn_tensor
27
- from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, StableDiffusionMixin
27
+ from ..pipeline_utils import AudioPipelineOutput, DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
28
28
 
29
29
 
30
30
  if is_torch_xla_available():
@@ -57,7 +57,7 @@ EXAMPLE_DOC_STRING = """
57
57
  """
58
58
 
59
59
 
60
- class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
60
+ class AudioLDMPipeline(DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin):
61
61
  r"""
62
62
  Pipeline for text-to-audio generation using AudioLDM.
63
63
 
@@ -81,6 +81,7 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
81
81
  Vocoder of class `SpeechT5HifiGan`.
82
82
  """
83
83
 
84
+ _last_supported_version = "0.33.1"
84
85
  model_cpu_offload_seq = "text_encoder->unet->vae"
85
86
 
86
87
  def __init__(
@@ -261,7 +262,7 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
261
262
  def prepare_extra_step_kwargs(self, generator, eta):
262
263
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
263
264
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
264
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
265
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
265
266
  # and should be between [0, 1]
266
267
 
267
268
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -397,8 +398,8 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
397
398
  num_waveforms_per_prompt (`int`, *optional*, defaults to 1):
398
399
  The number of waveforms to generate per prompt.
399
400
  eta (`float`, *optional*, defaults to 0.0):
400
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
401
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
401
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
402
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
402
403
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
403
404
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
404
405
  generation deterministic.
@@ -472,7 +473,7 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
472
473
 
473
474
  device = self._execution_device
474
475
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
475
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
476
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
476
477
  # corresponds to doing no classifier free guidance.
477
478
  do_classifier_free_guidance = guidance_scale > 1.0
478
479
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 CVSSP, ByteDance and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 CVSSP, ByteDance and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -40,7 +40,8 @@ from ...utils import (
40
40
  logging,
41
41
  replace_example_docstring,
42
42
  )
43
- from ...utils.torch_utils import randn_tensor
43
+ from ...utils.import_utils import is_transformers_version
44
+ from ...utils.torch_utils import empty_device_cache, randn_tensor
44
45
  from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline
45
46
  from .modeling_audioldm2 import AudioLDM2ProjectionModel, AudioLDM2UNet2DConditionModel
46
47
 
@@ -266,9 +267,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
266
267
 
267
268
  if self.device.type != "cpu":
268
269
  self.to("cpu", silence_dtype_warnings=True)
269
- device_mod = getattr(torch, device.type, None)
270
- if hasattr(device_mod, "empty_cache") and device_mod.is_available():
271
- device_mod.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
270
+ empty_device_cache(device.type)
272
271
 
273
272
  model_sequence = [
274
273
  self.text_encoder.text_model,
@@ -312,8 +311,19 @@ class AudioLDM2Pipeline(DiffusionPipeline):
312
311
  `inputs_embeds (`torch.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
313
312
  The sequence of generated hidden-states.
314
313
  """
314
+ cache_position_kwargs = {}
315
+ if is_transformers_version("<", "4.52.0.dev0"):
316
+ cache_position_kwargs["input_ids"] = inputs_embeds
317
+ cache_position_kwargs["model_kwargs"] = model_kwargs
318
+ else:
319
+ cache_position_kwargs["seq_length"] = inputs_embeds.shape[0]
320
+ cache_position_kwargs["device"] = (
321
+ self.language_model.device if getattr(self, "language_model", None) is not None else self.device
322
+ )
323
+ cache_position_kwargs["model_kwargs"] = model_kwargs
315
324
  max_new_tokens = max_new_tokens if max_new_tokens is not None else self.language_model.config.max_new_tokens
316
- model_kwargs = self.language_model._get_initial_cache_position(inputs_embeds, model_kwargs)
325
+ model_kwargs = self.language_model._get_initial_cache_position(**cache_position_kwargs)
326
+
317
327
  for _ in range(max_new_tokens):
318
328
  # prepare model inputs
319
329
  model_inputs = prepare_inputs_for_generation(inputs_embeds, **model_kwargs)
@@ -373,7 +383,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
373
383
  *e.g.* prompt weighting. If not provided, negative_prompt_embeds will be computed from
374
384
  `negative_prompt` input argument.
375
385
  generated_prompt_embeds (`torch.Tensor`, *optional*):
376
- Pre-generated text embeddings from the GPT2 langauge model. Can be used to easily tweak text inputs,
386
+ Pre-generated text embeddings from the GPT2 language model. Can be used to easily tweak text inputs,
377
387
  *e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input
378
388
  argument.
379
389
  negative_generated_prompt_embeds (`torch.Tensor`, *optional*):
@@ -394,7 +404,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
394
404
  attention_mask (`torch.LongTensor`):
395
405
  Attention mask to be applied to the `prompt_embeds`.
396
406
  generated_prompt_embeds (`torch.Tensor`):
397
- Text embeddings generated from the GPT2 langauge model.
407
+ Text embeddings generated from the GPT2 language model.
398
408
 
399
409
  Example:
400
410
 
@@ -701,7 +711,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
701
711
  def prepare_extra_step_kwargs(self, generator, eta):
702
712
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
703
713
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
704
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
714
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
705
715
  # and should be between [0, 1]
706
716
 
707
717
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -888,8 +898,8 @@ class AudioLDM2Pipeline(DiffusionPipeline):
888
898
  generated waveforms based on their cosine similarity with the text input in the joint text-audio
889
899
  embedding space.
890
900
  eta (`float`, *optional*, defaults to 0.0):
891
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
892
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
901
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
902
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
893
903
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
894
904
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
895
905
  generation deterministic.
@@ -904,7 +914,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
904
914
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
905
915
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
906
916
  generated_prompt_embeds (`torch.Tensor`, *optional*):
907
- Pre-generated text embeddings from the GPT2 langauge model. Can be used to easily tweak text inputs,
917
+ Pre-generated text embeddings from the GPT2 language model. Can be used to easily tweak text inputs,
908
918
  *e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input
909
919
  argument.
910
920
  negative_generated_prompt_embeds (`torch.Tensor`, *optional*):
@@ -987,7 +997,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
987
997
 
988
998
  device = self._execution_device
989
999
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
990
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
1000
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
991
1001
  # corresponds to doing no classifier free guidance.
992
1002
  do_classifier_free_guidance = guidance_scale > 1.0
993
1003
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 AuraFlow Authors and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 AuraFlow Authors and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -12,17 +12,25 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import inspect
15
- from typing import Callable, Dict, List, Optional, Tuple, Union
15
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
16
16
 
17
17
  import torch
18
18
  from transformers import T5Tokenizer, UMT5EncoderModel
19
19
 
20
20
  from ...callbacks import MultiPipelineCallbacks, PipelineCallback
21
21
  from ...image_processor import VaeImageProcessor
22
+ from ...loaders import AuraFlowLoraLoaderMixin
22
23
  from ...models import AuraFlowTransformer2DModel, AutoencoderKL
23
24
  from ...models.attention_processor import AttnProcessor2_0, FusedAttnProcessor2_0, XFormersAttnProcessor
24
25
  from ...schedulers import FlowMatchEulerDiscreteScheduler
25
- from ...utils import is_torch_xla_available, logging, replace_example_docstring
26
+ from ...utils import (
27
+ USE_PEFT_BACKEND,
28
+ is_torch_xla_available,
29
+ logging,
30
+ replace_example_docstring,
31
+ scale_lora_layers,
32
+ unscale_lora_layers,
33
+ )
26
34
  from ...utils.torch_utils import randn_tensor
27
35
  from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
28
36
 
@@ -112,7 +120,7 @@ def retrieve_timesteps(
112
120
  return timesteps, num_inference_steps
113
121
 
114
122
 
115
- class AuraFlowPipeline(DiffusionPipeline):
123
+ class AuraFlowPipeline(DiffusionPipeline, AuraFlowLoraLoaderMixin):
116
124
  r"""
117
125
  Args:
118
126
  tokenizer (`T5TokenizerFast`):
@@ -233,6 +241,7 @@ class AuraFlowPipeline(DiffusionPipeline):
233
241
  prompt_attention_mask: Optional[torch.Tensor] = None,
234
242
  negative_prompt_attention_mask: Optional[torch.Tensor] = None,
235
243
  max_sequence_length: int = 256,
244
+ lora_scale: Optional[float] = None,
236
245
  ):
237
246
  r"""
238
247
  Encodes the prompt into text encoder hidden states.
@@ -259,10 +268,20 @@ class AuraFlowPipeline(DiffusionPipeline):
259
268
  negative_prompt_attention_mask (`torch.Tensor`, *optional*):
260
269
  Pre-generated attention mask for negative text embeddings.
261
270
  max_sequence_length (`int`, defaults to 256): Maximum sequence length to use for the prompt.
271
+ lora_scale (`float`, *optional*):
272
+ A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
262
273
  """
274
+ # set lora scale so that monkey patched LoRA
275
+ # function of text encoder can correctly access it
276
+ if lora_scale is not None and isinstance(self, AuraFlowLoraLoaderMixin):
277
+ self._lora_scale = lora_scale
278
+
279
+ # dynamically adjust the LoRA scale
280
+ if self.text_encoder is not None and USE_PEFT_BACKEND:
281
+ scale_lora_layers(self.text_encoder, lora_scale)
282
+
263
283
  if device is None:
264
284
  device = self._execution_device
265
-
266
285
  if prompt is not None and isinstance(prompt, str):
267
286
  batch_size = 1
268
287
  elif prompt is not None and isinstance(prompt, list):
@@ -346,6 +365,11 @@ class AuraFlowPipeline(DiffusionPipeline):
346
365
  negative_prompt_embeds = None
347
366
  negative_prompt_attention_mask = None
348
367
 
368
+ if self.text_encoder is not None:
369
+ if isinstance(self, AuraFlowLoraLoaderMixin) and USE_PEFT_BACKEND:
370
+ # Retrieve the original scale by scaling back the LoRA layers
371
+ unscale_lora_layers(self.text_encoder, lora_scale)
372
+
349
373
  return prompt_embeds, prompt_attention_mask, negative_prompt_embeds, negative_prompt_attention_mask
350
374
 
351
375
  # Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3.StableDiffusion3Pipeline.prepare_latents
@@ -403,6 +427,10 @@ class AuraFlowPipeline(DiffusionPipeline):
403
427
  def guidance_scale(self):
404
428
  return self._guidance_scale
405
429
 
430
+ @property
431
+ def attention_kwargs(self):
432
+ return self._attention_kwargs
433
+
406
434
  @property
407
435
  def num_timesteps(self):
408
436
  return self._num_timesteps
@@ -428,6 +456,7 @@ class AuraFlowPipeline(DiffusionPipeline):
428
456
  max_sequence_length: int = 256,
429
457
  output_type: Optional[str] = "pil",
430
458
  return_dict: bool = True,
459
+ attention_kwargs: Optional[Dict[str, Any]] = None,
431
460
  callback_on_step_end: Optional[
432
461
  Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
433
462
  ] = None,
@@ -455,11 +484,11 @@ class AuraFlowPipeline(DiffusionPipeline):
455
484
  Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
456
485
  `num_inference_steps` and `timesteps` must be `None`.
457
486
  guidance_scale (`float`, *optional*, defaults to 5.0):
458
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
459
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
460
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
461
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
462
- usually at the expense of lower image quality.
487
+ Guidance scale as defined in [Classifier-Free Diffusion
488
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
489
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
490
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
491
+ the text `prompt`, usually at the expense of lower image quality.
463
492
  num_images_per_prompt (`int`, *optional*, defaults to 1):
464
493
  The number of images to generate per prompt.
465
494
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -486,6 +515,10 @@ class AuraFlowPipeline(DiffusionPipeline):
486
515
  return_dict (`bool`, *optional*, defaults to `True`):
487
516
  Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead
488
517
  of a plain tuple.
518
+ attention_kwargs (`dict`, *optional*):
519
+ A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
520
+ `self.processor` in
521
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
489
522
  callback_on_step_end (`Callable`, *optional*):
490
523
  A function that calls at the end of each denoising steps during the inference. The function is called
491
524
  with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
@@ -520,6 +553,7 @@ class AuraFlowPipeline(DiffusionPipeline):
520
553
  )
521
554
 
522
555
  self._guidance_scale = guidance_scale
556
+ self._attention_kwargs = attention_kwargs
523
557
 
524
558
  # 2. Determine batch size.
525
559
  if prompt is not None and isinstance(prompt, str):
@@ -530,9 +564,10 @@ class AuraFlowPipeline(DiffusionPipeline):
530
564
  batch_size = prompt_embeds.shape[0]
531
565
 
532
566
  device = self._execution_device
567
+ lora_scale = self.attention_kwargs.get("scale", None) if self.attention_kwargs is not None else None
533
568
 
534
569
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
535
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
570
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
536
571
  # corresponds to doing no classifier free guidance.
537
572
  do_classifier_free_guidance = guidance_scale > 1.0
538
573
 
@@ -553,6 +588,7 @@ class AuraFlowPipeline(DiffusionPipeline):
553
588
  prompt_attention_mask=prompt_attention_mask,
554
589
  negative_prompt_attention_mask=negative_prompt_attention_mask,
555
590
  max_sequence_length=max_sequence_length,
591
+ lora_scale=lora_scale,
556
592
  )
557
593
  if do_classifier_free_guidance:
558
594
  prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
@@ -594,6 +630,7 @@ class AuraFlowPipeline(DiffusionPipeline):
594
630
  encoder_hidden_states=prompt_embeds,
595
631
  timestep=timestep,
596
632
  return_dict=False,
633
+ attention_kwargs=self.attention_kwargs,
597
634
  )[0]
598
635
 
599
636
  # perform guidance
@@ -21,6 +21,7 @@ from ..configuration_utils import ConfigMixin
21
21
  from ..models.controlnets import ControlNetUnionModel
22
22
  from ..utils import is_sentencepiece_available
23
23
  from .aura_flow import AuraFlowPipeline
24
+ from .chroma import ChromaPipeline
24
25
  from .cogview3 import CogView3PlusPipeline
25
26
  from .cogview4 import CogView4ControlPipeline, CogView4Pipeline
26
27
  from .controlnet import (
@@ -143,6 +144,7 @@ AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict(
143
144
  ("flux-controlnet", FluxControlNetPipeline),
144
145
  ("lumina", LuminaPipeline),
145
146
  ("lumina2", Lumina2Pipeline),
147
+ ("chroma", ChromaPipeline),
146
148
  ("cogview3", CogView3PlusPipeline),
147
149
  ("cogview4", CogView4Pipeline),
148
150
  ("cogview4-control", CogView4ControlPipeline),
@@ -322,9 +324,8 @@ class AutoPipelineForText2Image(ConfigMixin):
322
324
  - A path to a *directory* (for example `./my_pipeline_directory/`) containing pipeline weights
323
325
  saved using
324
326
  [`~DiffusionPipeline.save_pretrained`].
325
- torch_dtype (`str` or `torch.dtype`, *optional*):
326
- Override the default `torch.dtype` and load the model with another dtype. If "auto" is passed, the
327
- dtype is automatically derived from the model's weights.
327
+ torch_dtype (`torch.dtype`, *optional*):
328
+ Override the default `torch.dtype` and load the model with another dtype.
328
329
  force_download (`bool`, *optional*, defaults to `False`):
329
330
  Whether or not to force the (re-)download of the model weights and configuration files, overriding the
330
331
  cached versions if they exist.
@@ -619,8 +620,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
619
620
  saved using
620
621
  [`~DiffusionPipeline.save_pretrained`].
621
622
  torch_dtype (`str` or `torch.dtype`, *optional*):
622
- Override the default `torch.dtype` and load the model with another dtype. If "auto" is passed, the
623
- dtype is automatically derived from the model's weights.
623
+ Override the default `torch.dtype` and load the model with another dtype.
624
624
  force_download (`bool`, *optional*, defaults to `False`):
625
625
  Whether or not to force the (re-)download of the model weights and configuration files, overriding the
626
626
  cached versions if they exist.
@@ -930,8 +930,7 @@ class AutoPipelineForInpainting(ConfigMixin):
930
930
  saved using
931
931
  [`~DiffusionPipeline.save_pretrained`].
932
932
  torch_dtype (`str` or `torch.dtype`, *optional*):
933
- Override the default `torch.dtype` and load the model with another dtype. If "auto" is passed, the
934
- dtype is automatically derived from the model's weights.
933
+ Override the default `torch.dtype` and load the model with another dtype.
935
934
  force_download (`bool`, *optional*, defaults to `False`):
936
935
  Whether or not to force the (re-)download of the model weights and configuration files, overriding the
937
936
  cached versions if they exist.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,5 +1,5 @@
1
- # Copyright 2024 Salesforce.com, inc.
2
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Salesforce.com, inc.
2
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
5
  # you may not use this file except in compliance with the License.
@@ -1,5 +1,5 @@
1
- # Copyright 2024 Salesforce.com, inc.
2
- # Copyright 2024 The HuggingFace Team. All rights reserved.#
1
+ # Copyright 2025 Salesforce.com, inc.
2
+ # Copyright 2025 The HuggingFace Team. All rights reserved.#
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
5
5
  # You may obtain a copy of the License at
@@ -25,7 +25,7 @@ from ...utils import (
25
25
  replace_example_docstring,
26
26
  )
27
27
  from ...utils.torch_utils import randn_tensor
28
- from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
28
+ from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
29
29
  from .blip_image_processing import BlipImageProcessor
30
30
  from .modeling_blip2 import Blip2QFormerModel
31
31
  from .modeling_ctx_clip import ContextCLIPTextModel
@@ -81,7 +81,7 @@ EXAMPLE_DOC_STRING = """
81
81
  """
82
82
 
83
83
 
84
- class BlipDiffusionPipeline(DiffusionPipeline):
84
+ class BlipDiffusionPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
85
85
  """
86
86
  Pipeline for Zero-Shot Subject Driven Generation using Blip Diffusion.
87
87
 
@@ -107,6 +107,7 @@ class BlipDiffusionPipeline(DiffusionPipeline):
107
107
  Position of the context token in the text encoder.
108
108
  """
109
109
 
110
+ _last_supported_version = "0.33.1"
110
111
  model_cpu_offload_seq = "qformer->text_encoder->unet->vae"
111
112
 
112
113
  def __init__(
@@ -138,7 +139,7 @@ class BlipDiffusionPipeline(DiffusionPipeline):
138
139
  def get_query_embeddings(self, input_image, src_subject):
139
140
  return self.qformer(image_input=input_image, text_input=src_subject, return_dict=False)
140
141
 
141
- # from the original Blip Diffusion code, speciefies the target subject and augments the prompt by repeating it
142
+ # from the original Blip Diffusion code, specifies the target subject and augments the prompt by repeating it
142
143
  def _build_prompt(self, prompts, tgt_subjects, prompt_strength=1.0, prompt_reps=20):
143
144
  rv = []
144
145
  for prompt, tgt_subject in zip(prompts, tgt_subjects):
@@ -229,11 +230,11 @@ class BlipDiffusionPipeline(DiffusionPipeline):
229
230
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
230
231
  tensor will ge generated by random sampling.
231
232
  guidance_scale (`float`, *optional*, defaults to 7.5):
232
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
233
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
234
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
235
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
236
- usually at the expense of lower image quality.
233
+ Guidance scale as defined in [Classifier-Free Diffusion
234
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
235
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
236
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
237
+ the text `prompt`, usually at the expense of lower image quality.
237
238
  height (`int`, *optional*, defaults to 512):
238
239
  The height of the generated image.
239
240
  width (`int`, *optional*, defaults to 512):
@@ -0,0 +1,49 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_torch_available,
9
+ is_transformers_available,
10
+ )
11
+
12
+
13
+ _dummy_objects = {}
14
+ _additional_imports = {}
15
+ _import_structure = {"pipeline_output": ["ChromaPipelineOutput"]}
16
+
17
+ try:
18
+ if not (is_transformers_available() and is_torch_available()):
19
+ raise OptionalDependencyNotAvailable()
20
+ except OptionalDependencyNotAvailable:
21
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
22
+
23
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
24
+ else:
25
+ _import_structure["pipeline_chroma"] = ["ChromaPipeline"]
26
+ _import_structure["pipeline_chroma_img2img"] = ["ChromaImg2ImgPipeline"]
27
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
28
+ try:
29
+ if not (is_transformers_available() and is_torch_available()):
30
+ raise OptionalDependencyNotAvailable()
31
+ except OptionalDependencyNotAvailable:
32
+ from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
33
+ else:
34
+ from .pipeline_chroma import ChromaPipeline
35
+ from .pipeline_chroma_img2img import ChromaImg2ImgPipeline
36
+ else:
37
+ import sys
38
+
39
+ sys.modules[__name__] = _LazyModule(
40
+ __name__,
41
+ globals()["__file__"],
42
+ _import_structure,
43
+ module_spec=__spec__,
44
+ )
45
+
46
+ for name, value in _dummy_objects.items():
47
+ setattr(sys.modules[__name__], name, value)
48
+ for name, value in _additional_imports.items():
49
+ setattr(sys.modules[__name__], name, value)