diffusers 0.33.1__py3-none-any.whl → 0.35.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (551) hide show
  1. diffusers/__init__.py +145 -1
  2. diffusers/callbacks.py +35 -0
  3. diffusers/commands/__init__.py +1 -1
  4. diffusers/commands/custom_blocks.py +134 -0
  5. diffusers/commands/diffusers_cli.py +3 -1
  6. diffusers/commands/env.py +1 -1
  7. diffusers/commands/fp16_safetensors.py +2 -2
  8. diffusers/configuration_utils.py +11 -2
  9. diffusers/dependency_versions_check.py +1 -1
  10. diffusers/dependency_versions_table.py +3 -3
  11. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  12. diffusers/guiders/__init__.py +41 -0
  13. diffusers/guiders/adaptive_projected_guidance.py +188 -0
  14. diffusers/guiders/auto_guidance.py +190 -0
  15. diffusers/guiders/classifier_free_guidance.py +141 -0
  16. diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
  17. diffusers/guiders/frequency_decoupled_guidance.py +327 -0
  18. diffusers/guiders/guider_utils.py +309 -0
  19. diffusers/guiders/perturbed_attention_guidance.py +271 -0
  20. diffusers/guiders/skip_layer_guidance.py +262 -0
  21. diffusers/guiders/smoothed_energy_guidance.py +251 -0
  22. diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
  23. diffusers/hooks/__init__.py +17 -0
  24. diffusers/hooks/_common.py +56 -0
  25. diffusers/hooks/_helpers.py +293 -0
  26. diffusers/hooks/faster_cache.py +9 -8
  27. diffusers/hooks/first_block_cache.py +259 -0
  28. diffusers/hooks/group_offloading.py +332 -227
  29. diffusers/hooks/hooks.py +58 -3
  30. diffusers/hooks/layer_skip.py +263 -0
  31. diffusers/hooks/layerwise_casting.py +5 -10
  32. diffusers/hooks/pyramid_attention_broadcast.py +15 -12
  33. diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
  34. diffusers/hooks/utils.py +43 -0
  35. diffusers/image_processor.py +7 -2
  36. diffusers/loaders/__init__.py +10 -0
  37. diffusers/loaders/ip_adapter.py +260 -18
  38. diffusers/loaders/lora_base.py +261 -127
  39. diffusers/loaders/lora_conversion_utils.py +657 -35
  40. diffusers/loaders/lora_pipeline.py +2778 -1246
  41. diffusers/loaders/peft.py +78 -112
  42. diffusers/loaders/single_file.py +2 -2
  43. diffusers/loaders/single_file_model.py +64 -15
  44. diffusers/loaders/single_file_utils.py +395 -7
  45. diffusers/loaders/textual_inversion.py +3 -2
  46. diffusers/loaders/transformer_flux.py +10 -11
  47. diffusers/loaders/transformer_sd3.py +8 -3
  48. diffusers/loaders/unet.py +24 -21
  49. diffusers/loaders/unet_loader_utils.py +6 -3
  50. diffusers/loaders/utils.py +1 -1
  51. diffusers/models/__init__.py +23 -1
  52. diffusers/models/activations.py +5 -5
  53. diffusers/models/adapter.py +2 -3
  54. diffusers/models/attention.py +488 -7
  55. diffusers/models/attention_dispatch.py +1218 -0
  56. diffusers/models/attention_flax.py +10 -10
  57. diffusers/models/attention_processor.py +113 -667
  58. diffusers/models/auto_model.py +49 -12
  59. diffusers/models/autoencoders/__init__.py +2 -0
  60. diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
  61. diffusers/models/autoencoders/autoencoder_dc.py +17 -4
  62. diffusers/models/autoencoders/autoencoder_kl.py +5 -5
  63. diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
  64. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
  65. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1110 -0
  66. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
  67. diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
  68. diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
  69. diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
  70. diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
  71. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
  72. diffusers/models/autoencoders/autoencoder_kl_wan.py +626 -62
  73. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
  74. diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
  75. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  76. diffusers/models/autoencoders/vae.py +13 -2
  77. diffusers/models/autoencoders/vq_model.py +2 -2
  78. diffusers/models/cache_utils.py +32 -10
  79. diffusers/models/controlnet.py +1 -1
  80. diffusers/models/controlnet_flux.py +1 -1
  81. diffusers/models/controlnet_sd3.py +1 -1
  82. diffusers/models/controlnet_sparsectrl.py +1 -1
  83. diffusers/models/controlnets/__init__.py +1 -0
  84. diffusers/models/controlnets/controlnet.py +3 -3
  85. diffusers/models/controlnets/controlnet_flax.py +1 -1
  86. diffusers/models/controlnets/controlnet_flux.py +21 -20
  87. diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
  88. diffusers/models/controlnets/controlnet_sana.py +290 -0
  89. diffusers/models/controlnets/controlnet_sd3.py +1 -1
  90. diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
  91. diffusers/models/controlnets/controlnet_union.py +5 -5
  92. diffusers/models/controlnets/controlnet_xs.py +7 -7
  93. diffusers/models/controlnets/multicontrolnet.py +4 -5
  94. diffusers/models/controlnets/multicontrolnet_union.py +5 -6
  95. diffusers/models/downsampling.py +2 -2
  96. diffusers/models/embeddings.py +36 -46
  97. diffusers/models/embeddings_flax.py +2 -2
  98. diffusers/models/lora.py +3 -3
  99. diffusers/models/model_loading_utils.py +233 -1
  100. diffusers/models/modeling_flax_utils.py +1 -2
  101. diffusers/models/modeling_utils.py +203 -108
  102. diffusers/models/normalization.py +4 -4
  103. diffusers/models/resnet.py +2 -2
  104. diffusers/models/resnet_flax.py +1 -1
  105. diffusers/models/transformers/__init__.py +7 -0
  106. diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
  107. diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
  108. diffusers/models/transformers/consisid_transformer_3d.py +1 -1
  109. diffusers/models/transformers/dit_transformer_2d.py +2 -2
  110. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  111. diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
  112. diffusers/models/transformers/latte_transformer_3d.py +4 -5
  113. diffusers/models/transformers/lumina_nextdit2d.py +2 -2
  114. diffusers/models/transformers/pixart_transformer_2d.py +3 -3
  115. diffusers/models/transformers/prior_transformer.py +1 -1
  116. diffusers/models/transformers/sana_transformer.py +8 -3
  117. diffusers/models/transformers/stable_audio_transformer.py +5 -9
  118. diffusers/models/transformers/t5_film_transformer.py +3 -3
  119. diffusers/models/transformers/transformer_2d.py +1 -1
  120. diffusers/models/transformers/transformer_allegro.py +1 -1
  121. diffusers/models/transformers/transformer_chroma.py +641 -0
  122. diffusers/models/transformers/transformer_cogview3plus.py +5 -10
  123. diffusers/models/transformers/transformer_cogview4.py +353 -27
  124. diffusers/models/transformers/transformer_cosmos.py +586 -0
  125. diffusers/models/transformers/transformer_flux.py +376 -138
  126. diffusers/models/transformers/transformer_hidream_image.py +942 -0
  127. diffusers/models/transformers/transformer_hunyuan_video.py +12 -8
  128. diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
  129. diffusers/models/transformers/transformer_ltx.py +105 -24
  130. diffusers/models/transformers/transformer_lumina2.py +1 -1
  131. diffusers/models/transformers/transformer_mochi.py +1 -1
  132. diffusers/models/transformers/transformer_omnigen.py +2 -2
  133. diffusers/models/transformers/transformer_qwenimage.py +645 -0
  134. diffusers/models/transformers/transformer_sd3.py +7 -7
  135. diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
  136. diffusers/models/transformers/transformer_temporal.py +1 -1
  137. diffusers/models/transformers/transformer_wan.py +316 -87
  138. diffusers/models/transformers/transformer_wan_vace.py +387 -0
  139. diffusers/models/unets/unet_1d.py +1 -1
  140. diffusers/models/unets/unet_1d_blocks.py +1 -1
  141. diffusers/models/unets/unet_2d.py +1 -1
  142. diffusers/models/unets/unet_2d_blocks.py +1 -1
  143. diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
  144. diffusers/models/unets/unet_2d_condition.py +4 -3
  145. diffusers/models/unets/unet_2d_condition_flax.py +2 -2
  146. diffusers/models/unets/unet_3d_blocks.py +1 -1
  147. diffusers/models/unets/unet_3d_condition.py +3 -3
  148. diffusers/models/unets/unet_i2vgen_xl.py +3 -3
  149. diffusers/models/unets/unet_kandinsky3.py +1 -1
  150. diffusers/models/unets/unet_motion_model.py +2 -2
  151. diffusers/models/unets/unet_stable_cascade.py +1 -1
  152. diffusers/models/upsampling.py +2 -2
  153. diffusers/models/vae_flax.py +2 -2
  154. diffusers/models/vq_model.py +1 -1
  155. diffusers/modular_pipelines/__init__.py +83 -0
  156. diffusers/modular_pipelines/components_manager.py +1068 -0
  157. diffusers/modular_pipelines/flux/__init__.py +66 -0
  158. diffusers/modular_pipelines/flux/before_denoise.py +689 -0
  159. diffusers/modular_pipelines/flux/decoders.py +109 -0
  160. diffusers/modular_pipelines/flux/denoise.py +227 -0
  161. diffusers/modular_pipelines/flux/encoders.py +412 -0
  162. diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
  163. diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
  164. diffusers/modular_pipelines/modular_pipeline.py +2446 -0
  165. diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
  166. diffusers/modular_pipelines/node_utils.py +665 -0
  167. diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
  168. diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
  169. diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
  170. diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
  171. diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
  172. diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
  173. diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
  174. diffusers/modular_pipelines/wan/__init__.py +66 -0
  175. diffusers/modular_pipelines/wan/before_denoise.py +365 -0
  176. diffusers/modular_pipelines/wan/decoders.py +105 -0
  177. diffusers/modular_pipelines/wan/denoise.py +261 -0
  178. diffusers/modular_pipelines/wan/encoders.py +242 -0
  179. diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
  180. diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
  181. diffusers/pipelines/__init__.py +68 -6
  182. diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
  183. diffusers/pipelines/amused/pipeline_amused.py +7 -6
  184. diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
  185. diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
  186. diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
  187. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
  188. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
  189. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
  190. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
  191. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
  192. diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
  193. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  194. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +22 -13
  195. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
  196. diffusers/pipelines/auto_pipeline.py +23 -20
  197. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  198. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  199. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
  200. diffusers/pipelines/chroma/__init__.py +49 -0
  201. diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
  202. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
  203. diffusers/pipelines/chroma/pipeline_output.py +21 -0
  204. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +17 -16
  205. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +17 -16
  206. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +18 -17
  207. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +17 -16
  208. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
  209. diffusers/pipelines/cogview4/pipeline_cogview4.py +23 -22
  210. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
  211. diffusers/pipelines/consisid/consisid_utils.py +2 -2
  212. diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
  213. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  214. diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
  215. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +11 -10
  216. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
  217. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
  218. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
  219. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
  220. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
  221. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +226 -107
  222. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +12 -8
  223. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +207 -105
  224. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  225. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
  226. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
  227. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  228. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
  229. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
  230. diffusers/pipelines/cosmos/__init__.py +54 -0
  231. diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
  232. diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
  233. diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
  234. diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
  235. diffusers/pipelines/cosmos/pipeline_output.py +40 -0
  236. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
  237. diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
  238. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  239. diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
  240. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
  241. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
  242. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
  243. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
  244. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
  245. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
  246. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
  247. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  248. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
  249. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  250. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
  251. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
  252. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  253. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  254. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  255. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  256. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  257. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +8 -8
  258. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
  259. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
  260. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
  261. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
  262. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
  263. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  264. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
  265. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
  266. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
  267. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
  268. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
  269. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  270. diffusers/pipelines/dit/pipeline_dit.py +4 -2
  271. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
  272. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
  273. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
  274. diffusers/pipelines/flux/__init__.py +4 -0
  275. diffusers/pipelines/flux/modeling_flux.py +1 -1
  276. diffusers/pipelines/flux/pipeline_flux.py +37 -36
  277. diffusers/pipelines/flux/pipeline_flux_control.py +9 -9
  278. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +7 -7
  279. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +7 -7
  280. diffusers/pipelines/flux/pipeline_flux_controlnet.py +7 -7
  281. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +31 -23
  282. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +3 -2
  283. diffusers/pipelines/flux/pipeline_flux_fill.py +7 -7
  284. diffusers/pipelines/flux/pipeline_flux_img2img.py +40 -7
  285. diffusers/pipelines/flux/pipeline_flux_inpaint.py +12 -7
  286. diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
  287. diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
  288. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +2 -2
  289. diffusers/pipelines/flux/pipeline_output.py +6 -4
  290. diffusers/pipelines/free_init_utils.py +2 -2
  291. diffusers/pipelines/free_noise_utils.py +3 -3
  292. diffusers/pipelines/hidream_image/__init__.py +47 -0
  293. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
  294. diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
  295. diffusers/pipelines/hunyuan_video/__init__.py +2 -0
  296. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
  297. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +26 -25
  298. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
  299. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
  300. diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
  301. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
  302. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
  303. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
  304. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
  305. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
  306. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
  307. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
  308. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  309. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
  310. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
  311. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
  312. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
  313. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
  314. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
  315. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
  316. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
  317. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
  318. diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
  319. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
  320. diffusers/pipelines/kolors/text_encoder.py +3 -3
  321. diffusers/pipelines/kolors/tokenizer.py +1 -1
  322. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
  323. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
  324. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  325. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
  326. diffusers/pipelines/latte/pipeline_latte.py +12 -12
  327. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
  328. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
  329. diffusers/pipelines/ltx/__init__.py +4 -0
  330. diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
  331. diffusers/pipelines/ltx/pipeline_ltx.py +64 -18
  332. diffusers/pipelines/ltx/pipeline_ltx_condition.py +117 -38
  333. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +63 -18
  334. diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
  335. diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
  336. diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
  337. diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
  338. diffusers/pipelines/mochi/pipeline_mochi.py +15 -14
  339. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
  340. diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
  341. diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
  342. diffusers/pipelines/onnx_utils.py +15 -2
  343. diffusers/pipelines/pag/pag_utils.py +2 -2
  344. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
  345. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
  346. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
  347. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
  348. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
  349. diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
  350. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
  351. diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
  352. diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
  353. diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
  354. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
  355. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
  356. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
  357. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
  358. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
  359. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
  360. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
  361. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  362. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
  363. diffusers/pipelines/pia/pipeline_pia.py +8 -6
  364. diffusers/pipelines/pipeline_flax_utils.py +5 -6
  365. diffusers/pipelines/pipeline_loading_utils.py +113 -15
  366. diffusers/pipelines/pipeline_utils.py +127 -48
  367. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +14 -12
  368. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +31 -11
  369. diffusers/pipelines/qwenimage/__init__.py +55 -0
  370. diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
  371. diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
  372. diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
  373. diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
  374. diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
  375. diffusers/pipelines/sana/__init__.py +4 -0
  376. diffusers/pipelines/sana/pipeline_sana.py +23 -21
  377. diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
  378. diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
  379. diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
  380. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
  381. diffusers/pipelines/shap_e/camera.py +1 -1
  382. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  383. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  384. diffusers/pipelines/shap_e/renderer.py +3 -3
  385. diffusers/pipelines/skyreels_v2/__init__.py +59 -0
  386. diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
  387. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
  388. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
  389. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
  390. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
  391. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
  392. diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
  393. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
  394. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
  395. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
  396. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
  397. diffusers/pipelines/stable_diffusion/__init__.py +0 -7
  398. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  399. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
  400. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  401. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
  402. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  403. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +12 -11
  404. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
  405. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +11 -11
  406. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +10 -10
  407. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -9
  408. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
  409. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
  410. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
  411. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
  412. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
  413. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
  414. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
  415. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
  416. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
  417. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  418. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  419. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  420. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +13 -12
  421. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
  422. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
  423. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
  424. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
  425. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
  426. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
  427. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
  428. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
  429. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
  430. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
  431. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
  432. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  433. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
  434. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  435. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
  436. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
  437. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
  438. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
  439. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
  440. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
  441. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
  442. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
  443. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
  444. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
  445. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
  446. diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
  447. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
  448. diffusers/pipelines/unclip/text_proj.py +2 -2
  449. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
  450. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  451. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
  452. diffusers/pipelines/visualcloze/__init__.py +52 -0
  453. diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
  454. diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
  455. diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
  456. diffusers/pipelines/wan/__init__.py +2 -0
  457. diffusers/pipelines/wan/pipeline_wan.py +91 -30
  458. diffusers/pipelines/wan/pipeline_wan_i2v.py +145 -45
  459. diffusers/pipelines/wan/pipeline_wan_vace.py +975 -0
  460. diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
  461. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  462. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
  463. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  464. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  465. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
  466. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
  467. diffusers/quantizers/__init__.py +3 -1
  468. diffusers/quantizers/base.py +17 -1
  469. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
  470. diffusers/quantizers/bitsandbytes/utils.py +10 -7
  471. diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
  472. diffusers/quantizers/gguf/utils.py +108 -16
  473. diffusers/quantizers/pipe_quant_config.py +202 -0
  474. diffusers/quantizers/quantization_config.py +18 -16
  475. diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
  476. diffusers/quantizers/torchao/torchao_quantizer.py +31 -1
  477. diffusers/schedulers/__init__.py +3 -1
  478. diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
  479. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  480. diffusers/schedulers/scheduling_consistency_models.py +1 -1
  481. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
  482. diffusers/schedulers/scheduling_ddim.py +8 -8
  483. diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
  484. diffusers/schedulers/scheduling_ddim_flax.py +6 -6
  485. diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
  486. diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
  487. diffusers/schedulers/scheduling_ddpm.py +9 -9
  488. diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
  489. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
  490. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
  491. diffusers/schedulers/scheduling_deis_multistep.py +16 -9
  492. diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
  493. diffusers/schedulers/scheduling_dpmsolver_multistep.py +18 -12
  494. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
  495. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
  496. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  497. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +19 -13
  498. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
  499. diffusers/schedulers/scheduling_edm_euler.py +20 -11
  500. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
  501. diffusers/schedulers/scheduling_euler_discrete.py +3 -3
  502. diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
  503. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
  504. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
  505. diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
  506. diffusers/schedulers/scheduling_heun_discrete.py +2 -2
  507. diffusers/schedulers/scheduling_ipndm.py +2 -2
  508. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
  509. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
  510. diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
  511. diffusers/schedulers/scheduling_lcm.py +3 -3
  512. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  513. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  514. diffusers/schedulers/scheduling_pndm.py +4 -4
  515. diffusers/schedulers/scheduling_pndm_flax.py +4 -4
  516. diffusers/schedulers/scheduling_repaint.py +9 -9
  517. diffusers/schedulers/scheduling_sasolver.py +15 -15
  518. diffusers/schedulers/scheduling_scm.py +1 -2
  519. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  520. diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
  521. diffusers/schedulers/scheduling_tcd.py +3 -3
  522. diffusers/schedulers/scheduling_unclip.py +5 -5
  523. diffusers/schedulers/scheduling_unipc_multistep.py +21 -12
  524. diffusers/schedulers/scheduling_utils.py +3 -3
  525. diffusers/schedulers/scheduling_utils_flax.py +2 -2
  526. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  527. diffusers/training_utils.py +91 -5
  528. diffusers/utils/__init__.py +15 -0
  529. diffusers/utils/accelerate_utils.py +1 -1
  530. diffusers/utils/constants.py +4 -0
  531. diffusers/utils/doc_utils.py +1 -1
  532. diffusers/utils/dummy_pt_objects.py +432 -0
  533. diffusers/utils/dummy_torch_and_transformers_objects.py +480 -0
  534. diffusers/utils/dynamic_modules_utils.py +85 -8
  535. diffusers/utils/export_utils.py +1 -1
  536. diffusers/utils/hub_utils.py +33 -17
  537. diffusers/utils/import_utils.py +151 -18
  538. diffusers/utils/logging.py +1 -1
  539. diffusers/utils/outputs.py +2 -1
  540. diffusers/utils/peft_utils.py +96 -10
  541. diffusers/utils/state_dict_utils.py +20 -3
  542. diffusers/utils/testing_utils.py +195 -17
  543. diffusers/utils/torch_utils.py +43 -5
  544. diffusers/video_processor.py +2 -2
  545. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/METADATA +72 -57
  546. diffusers-0.35.0.dist-info/RECORD +703 -0
  547. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/WHEEL +1 -1
  548. diffusers-0.33.1.dist-info/RECORD +0 -608
  549. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
  550. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
  551. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -140,7 +140,7 @@ class AnimateDiffSparseControlNetPipeline(
140
140
  ):
141
141
  r"""
142
142
  Pipeline for controlled text-to-video generation using the method described in [SparseCtrl: Adding Sparse Controls
143
- to Text-to-Video Diffusion Models](https://arxiv.org/abs/2311.16933).
143
+ to Text-to-Video Diffusion Models](https://huggingface.co/papers/2311.16933).
144
144
 
145
145
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
146
146
  implemented for all pipelines (downloading, saving, running on a particular device, etc.).
@@ -475,7 +475,7 @@ class AnimateDiffSparseControlNetPipeline(
475
475
  def prepare_extra_step_kwargs(self, generator, eta):
476
476
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
477
477
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
478
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
478
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
479
479
  # and should be between [0, 1]
480
480
 
481
481
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -695,7 +695,7 @@ class AnimateDiffSparseControlNetPipeline(
695
695
  return self._clip_skip
696
696
 
697
697
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
698
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
698
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
699
699
  # corresponds to doing no classifier free guidance.
700
700
  @property
701
701
  def do_classifier_free_guidance(self):
@@ -762,8 +762,8 @@ class AnimateDiffSparseControlNetPipeline(
762
762
  The prompt or prompts to guide what to not include in image generation. If not defined, you need to
763
763
  pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
764
764
  eta (`float`, *optional*, defaults to 0.0):
765
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
766
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
765
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
766
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
767
767
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
768
768
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
769
769
  generation deterministic.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -539,7 +539,7 @@ class AnimateDiffVideoToVideoPipeline(
539
539
  def prepare_extra_step_kwargs(self, generator, eta):
540
540
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
541
541
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
542
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
542
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
543
543
  # and should be between [0, 1]
544
544
 
545
545
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -725,7 +725,7 @@ class AnimateDiffVideoToVideoPipeline(
725
725
  return self._clip_skip
726
726
 
727
727
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
728
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
728
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
729
729
  # corresponds to doing no classifier free guidance.
730
730
  @property
731
731
  def do_classifier_free_guidance(self):
@@ -805,8 +805,8 @@ class AnimateDiffVideoToVideoPipeline(
805
805
  The prompt or prompts to guide what to not include in image generation. If not defined, you need to
806
806
  pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
807
807
  eta (`float`, *optional*, defaults to 0.0):
808
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
809
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
808
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
809
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
810
810
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
811
811
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
812
812
  generation deterministic.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -571,7 +571,7 @@ class AnimateDiffVideoToVideoControlNetPipeline(
571
571
  def prepare_extra_step_kwargs(self, generator, eta):
572
572
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
573
573
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
574
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
574
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
575
575
  # and should be between [0, 1]
576
576
 
577
577
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -890,7 +890,7 @@ class AnimateDiffVideoToVideoControlNetPipeline(
890
890
  return self._clip_skip
891
891
 
892
892
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
893
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
893
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
894
894
  # corresponds to doing no classifier free guidance.
895
895
  @property
896
896
  def do_classifier_free_guidance(self):
@@ -975,8 +975,8 @@ class AnimateDiffVideoToVideoControlNetPipeline(
975
975
  The prompt or prompts to guide what to not include in image generation. If not defined, you need to
976
976
  pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
977
977
  eta (`float`, *optional*, defaults to 0.0):
978
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
979
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
978
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
979
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
980
980
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
981
981
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
982
982
  generation deterministic.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -24,7 +24,7 @@ from ...models import AutoencoderKL, UNet2DConditionModel
24
24
  from ...schedulers import KarrasDiffusionSchedulers
25
25
  from ...utils import is_torch_xla_available, logging, replace_example_docstring
26
26
  from ...utils.torch_utils import randn_tensor
27
- from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, StableDiffusionMixin
27
+ from ..pipeline_utils import AudioPipelineOutput, DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
28
28
 
29
29
 
30
30
  if is_torch_xla_available():
@@ -57,7 +57,7 @@ EXAMPLE_DOC_STRING = """
57
57
  """
58
58
 
59
59
 
60
- class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
60
+ class AudioLDMPipeline(DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin):
61
61
  r"""
62
62
  Pipeline for text-to-audio generation using AudioLDM.
63
63
 
@@ -81,6 +81,7 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
81
81
  Vocoder of class `SpeechT5HifiGan`.
82
82
  """
83
83
 
84
+ _last_supported_version = "0.33.1"
84
85
  model_cpu_offload_seq = "text_encoder->unet->vae"
85
86
 
86
87
  def __init__(
@@ -261,7 +262,7 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
261
262
  def prepare_extra_step_kwargs(self, generator, eta):
262
263
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
263
264
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
264
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
265
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
265
266
  # and should be between [0, 1]
266
267
 
267
268
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -397,8 +398,8 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
397
398
  num_waveforms_per_prompt (`int`, *optional*, defaults to 1):
398
399
  The number of waveforms to generate per prompt.
399
400
  eta (`float`, *optional*, defaults to 0.0):
400
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
401
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
401
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
402
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
402
403
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
403
404
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
404
405
  generation deterministic.
@@ -472,7 +473,7 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
472
473
 
473
474
  device = self._execution_device
474
475
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
475
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
476
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
476
477
  # corresponds to doing no classifier free guidance.
477
478
  do_classifier_free_guidance = guidance_scale > 1.0
478
479
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 CVSSP, ByteDance and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 CVSSP, ByteDance and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -40,7 +40,8 @@ from ...utils import (
40
40
  logging,
41
41
  replace_example_docstring,
42
42
  )
43
- from ...utils.torch_utils import randn_tensor
43
+ from ...utils.import_utils import is_transformers_version
44
+ from ...utils.torch_utils import empty_device_cache, randn_tensor
44
45
  from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline
45
46
  from .modeling_audioldm2 import AudioLDM2ProjectionModel, AudioLDM2UNet2DConditionModel
46
47
 
@@ -266,9 +267,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
266
267
 
267
268
  if self.device.type != "cpu":
268
269
  self.to("cpu", silence_dtype_warnings=True)
269
- device_mod = getattr(torch, device.type, None)
270
- if hasattr(device_mod, "empty_cache") and device_mod.is_available():
271
- device_mod.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
270
+ empty_device_cache(device.type)
272
271
 
273
272
  model_sequence = [
274
273
  self.text_encoder.text_model,
@@ -312,8 +311,18 @@ class AudioLDM2Pipeline(DiffusionPipeline):
312
311
  `inputs_embeds (`torch.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
313
312
  The sequence of generated hidden-states.
314
313
  """
314
+ cache_position_kwargs = {}
315
+ if is_transformers_version("<", "4.52.1"):
316
+ cache_position_kwargs["input_ids"] = inputs_embeds
317
+ else:
318
+ cache_position_kwargs["seq_length"] = inputs_embeds.shape[0]
319
+ cache_position_kwargs["device"] = (
320
+ self.language_model.device if getattr(self, "language_model", None) is not None else self.device
321
+ )
322
+ cache_position_kwargs["model_kwargs"] = model_kwargs
315
323
  max_new_tokens = max_new_tokens if max_new_tokens is not None else self.language_model.config.max_new_tokens
316
- model_kwargs = self.language_model._get_initial_cache_position(inputs_embeds, model_kwargs)
324
+ model_kwargs = self.language_model._get_initial_cache_position(**cache_position_kwargs)
325
+
317
326
  for _ in range(max_new_tokens):
318
327
  # prepare model inputs
319
328
  model_inputs = prepare_inputs_for_generation(inputs_embeds, **model_kwargs)
@@ -373,7 +382,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
373
382
  *e.g.* prompt weighting. If not provided, negative_prompt_embeds will be computed from
374
383
  `negative_prompt` input argument.
375
384
  generated_prompt_embeds (`torch.Tensor`, *optional*):
376
- Pre-generated text embeddings from the GPT2 langauge model. Can be used to easily tweak text inputs,
385
+ Pre-generated text embeddings from the GPT2 language model. Can be used to easily tweak text inputs,
377
386
  *e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input
378
387
  argument.
379
388
  negative_generated_prompt_embeds (`torch.Tensor`, *optional*):
@@ -394,7 +403,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
394
403
  attention_mask (`torch.LongTensor`):
395
404
  Attention mask to be applied to the `prompt_embeds`.
396
405
  generated_prompt_embeds (`torch.Tensor`):
397
- Text embeddings generated from the GPT2 langauge model.
406
+ Text embeddings generated from the GPT2 language model.
398
407
 
399
408
  Example:
400
409
 
@@ -701,7 +710,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
701
710
  def prepare_extra_step_kwargs(self, generator, eta):
702
711
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
703
712
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
704
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
713
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
705
714
  # and should be between [0, 1]
706
715
 
707
716
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -888,8 +897,8 @@ class AudioLDM2Pipeline(DiffusionPipeline):
888
897
  generated waveforms based on their cosine similarity with the text input in the joint text-audio
889
898
  embedding space.
890
899
  eta (`float`, *optional*, defaults to 0.0):
891
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
892
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
900
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
901
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
893
902
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
894
903
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
895
904
  generation deterministic.
@@ -904,7 +913,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
904
913
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
905
914
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
906
915
  generated_prompt_embeds (`torch.Tensor`, *optional*):
907
- Pre-generated text embeddings from the GPT2 langauge model. Can be used to easily tweak text inputs,
916
+ Pre-generated text embeddings from the GPT2 language model. Can be used to easily tweak text inputs,
908
917
  *e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input
909
918
  argument.
910
919
  negative_generated_prompt_embeds (`torch.Tensor`, *optional*):
@@ -987,7 +996,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
987
996
 
988
997
  device = self._execution_device
989
998
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
990
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
999
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
991
1000
  # corresponds to doing no classifier free guidance.
992
1001
  do_classifier_free_guidance = guidance_scale > 1.0
993
1002
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 AuraFlow Authors and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 AuraFlow Authors and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -12,17 +12,25 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import inspect
15
- from typing import Callable, Dict, List, Optional, Tuple, Union
15
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
16
16
 
17
17
  import torch
18
18
  from transformers import T5Tokenizer, UMT5EncoderModel
19
19
 
20
20
  from ...callbacks import MultiPipelineCallbacks, PipelineCallback
21
21
  from ...image_processor import VaeImageProcessor
22
+ from ...loaders import AuraFlowLoraLoaderMixin
22
23
  from ...models import AuraFlowTransformer2DModel, AutoencoderKL
23
24
  from ...models.attention_processor import AttnProcessor2_0, FusedAttnProcessor2_0, XFormersAttnProcessor
24
25
  from ...schedulers import FlowMatchEulerDiscreteScheduler
25
- from ...utils import is_torch_xla_available, logging, replace_example_docstring
26
+ from ...utils import (
27
+ USE_PEFT_BACKEND,
28
+ is_torch_xla_available,
29
+ logging,
30
+ replace_example_docstring,
31
+ scale_lora_layers,
32
+ unscale_lora_layers,
33
+ )
26
34
  from ...utils.torch_utils import randn_tensor
27
35
  from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
28
36
 
@@ -112,7 +120,7 @@ def retrieve_timesteps(
112
120
  return timesteps, num_inference_steps
113
121
 
114
122
 
115
- class AuraFlowPipeline(DiffusionPipeline):
123
+ class AuraFlowPipeline(DiffusionPipeline, AuraFlowLoraLoaderMixin):
116
124
  r"""
117
125
  Args:
118
126
  tokenizer (`T5TokenizerFast`):
@@ -233,6 +241,7 @@ class AuraFlowPipeline(DiffusionPipeline):
233
241
  prompt_attention_mask: Optional[torch.Tensor] = None,
234
242
  negative_prompt_attention_mask: Optional[torch.Tensor] = None,
235
243
  max_sequence_length: int = 256,
244
+ lora_scale: Optional[float] = None,
236
245
  ):
237
246
  r"""
238
247
  Encodes the prompt into text encoder hidden states.
@@ -259,10 +268,20 @@ class AuraFlowPipeline(DiffusionPipeline):
259
268
  negative_prompt_attention_mask (`torch.Tensor`, *optional*):
260
269
  Pre-generated attention mask for negative text embeddings.
261
270
  max_sequence_length (`int`, defaults to 256): Maximum sequence length to use for the prompt.
271
+ lora_scale (`float`, *optional*):
272
+ A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
262
273
  """
274
+ # set lora scale so that monkey patched LoRA
275
+ # function of text encoder can correctly access it
276
+ if lora_scale is not None and isinstance(self, AuraFlowLoraLoaderMixin):
277
+ self._lora_scale = lora_scale
278
+
279
+ # dynamically adjust the LoRA scale
280
+ if self.text_encoder is not None and USE_PEFT_BACKEND:
281
+ scale_lora_layers(self.text_encoder, lora_scale)
282
+
263
283
  if device is None:
264
284
  device = self._execution_device
265
-
266
285
  if prompt is not None and isinstance(prompt, str):
267
286
  batch_size = 1
268
287
  elif prompt is not None and isinstance(prompt, list):
@@ -346,6 +365,11 @@ class AuraFlowPipeline(DiffusionPipeline):
346
365
  negative_prompt_embeds = None
347
366
  negative_prompt_attention_mask = None
348
367
 
368
+ if self.text_encoder is not None:
369
+ if isinstance(self, AuraFlowLoraLoaderMixin) and USE_PEFT_BACKEND:
370
+ # Retrieve the original scale by scaling back the LoRA layers
371
+ unscale_lora_layers(self.text_encoder, lora_scale)
372
+
349
373
  return prompt_embeds, prompt_attention_mask, negative_prompt_embeds, negative_prompt_attention_mask
350
374
 
351
375
  # Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3.StableDiffusion3Pipeline.prepare_latents
@@ -403,6 +427,10 @@ class AuraFlowPipeline(DiffusionPipeline):
403
427
  def guidance_scale(self):
404
428
  return self._guidance_scale
405
429
 
430
+ @property
431
+ def attention_kwargs(self):
432
+ return self._attention_kwargs
433
+
406
434
  @property
407
435
  def num_timesteps(self):
408
436
  return self._num_timesteps
@@ -428,6 +456,7 @@ class AuraFlowPipeline(DiffusionPipeline):
428
456
  max_sequence_length: int = 256,
429
457
  output_type: Optional[str] = "pil",
430
458
  return_dict: bool = True,
459
+ attention_kwargs: Optional[Dict[str, Any]] = None,
431
460
  callback_on_step_end: Optional[
432
461
  Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
433
462
  ] = None,
@@ -455,11 +484,11 @@ class AuraFlowPipeline(DiffusionPipeline):
455
484
  Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
456
485
  `num_inference_steps` and `timesteps` must be `None`.
457
486
  guidance_scale (`float`, *optional*, defaults to 5.0):
458
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
459
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
460
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
461
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
462
- usually at the expense of lower image quality.
487
+ Guidance scale as defined in [Classifier-Free Diffusion
488
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
489
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
490
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
491
+ the text `prompt`, usually at the expense of lower image quality.
463
492
  num_images_per_prompt (`int`, *optional*, defaults to 1):
464
493
  The number of images to generate per prompt.
465
494
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -486,6 +515,10 @@ class AuraFlowPipeline(DiffusionPipeline):
486
515
  return_dict (`bool`, *optional*, defaults to `True`):
487
516
  Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead
488
517
  of a plain tuple.
518
+ attention_kwargs (`dict`, *optional*):
519
+ A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
520
+ `self.processor` in
521
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
489
522
  callback_on_step_end (`Callable`, *optional*):
490
523
  A function that calls at the end of each denoising steps during the inference. The function is called
491
524
  with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
@@ -520,6 +553,7 @@ class AuraFlowPipeline(DiffusionPipeline):
520
553
  )
521
554
 
522
555
  self._guidance_scale = guidance_scale
556
+ self._attention_kwargs = attention_kwargs
523
557
 
524
558
  # 2. Determine batch size.
525
559
  if prompt is not None and isinstance(prompt, str):
@@ -530,9 +564,10 @@ class AuraFlowPipeline(DiffusionPipeline):
530
564
  batch_size = prompt_embeds.shape[0]
531
565
 
532
566
  device = self._execution_device
567
+ lora_scale = self.attention_kwargs.get("scale", None) if self.attention_kwargs is not None else None
533
568
 
534
569
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
535
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
570
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
536
571
  # corresponds to doing no classifier free guidance.
537
572
  do_classifier_free_guidance = guidance_scale > 1.0
538
573
 
@@ -553,6 +588,7 @@ class AuraFlowPipeline(DiffusionPipeline):
553
588
  prompt_attention_mask=prompt_attention_mask,
554
589
  negative_prompt_attention_mask=negative_prompt_attention_mask,
555
590
  max_sequence_length=max_sequence_length,
591
+ lora_scale=lora_scale,
556
592
  )
557
593
  if do_classifier_free_guidance:
558
594
  prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
@@ -594,6 +630,7 @@ class AuraFlowPipeline(DiffusionPipeline):
594
630
  encoder_hidden_states=prompt_embeds,
595
631
  timestep=timestep,
596
632
  return_dict=False,
633
+ attention_kwargs=self.attention_kwargs,
597
634
  )[0]
598
635
 
599
636
  # perform guidance
@@ -21,6 +21,7 @@ from ..configuration_utils import ConfigMixin
21
21
  from ..models.controlnets import ControlNetUnionModel
22
22
  from ..utils import is_sentencepiece_available
23
23
  from .aura_flow import AuraFlowPipeline
24
+ from .chroma import ChromaPipeline
24
25
  from .cogview3 import CogView3PlusPipeline
25
26
  from .cogview4 import CogView4ControlPipeline, CogView4Pipeline
26
27
  from .controlnet import (
@@ -48,6 +49,7 @@ from .flux import (
48
49
  FluxControlPipeline,
49
50
  FluxImg2ImgPipeline,
50
51
  FluxInpaintPipeline,
52
+ FluxKontextPipeline,
51
53
  FluxPipeline,
52
54
  )
53
55
  from .hunyuandit import HunyuanDiTPipeline
@@ -141,8 +143,10 @@ AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict(
141
143
  ("flux", FluxPipeline),
142
144
  ("flux-control", FluxControlPipeline),
143
145
  ("flux-controlnet", FluxControlNetPipeline),
146
+ ("flux-kontext", FluxKontextPipeline),
144
147
  ("lumina", LuminaPipeline),
145
148
  ("lumina2", Lumina2Pipeline),
149
+ ("chroma", ChromaPipeline),
146
150
  ("cogview3", CogView3PlusPipeline),
147
151
  ("cogview4", CogView4Pipeline),
148
152
  ("cogview4-control", CogView4ControlPipeline),
@@ -169,6 +173,7 @@ AUTO_IMAGE2IMAGE_PIPELINES_MAPPING = OrderedDict(
169
173
  ("flux", FluxImg2ImgPipeline),
170
174
  ("flux-controlnet", FluxControlNetImg2ImgPipeline),
171
175
  ("flux-control", FluxControlImg2ImgPipeline),
176
+ ("flux-kontext", FluxKontextPipeline),
172
177
  ]
173
178
  )
174
179
 
@@ -246,14 +251,15 @@ def _get_connected_pipeline(pipeline_cls):
246
251
  return _get_task_class(AUTO_INPAINT_PIPELINES_MAPPING, pipeline_cls.__name__, throw_error_if_not_exist=False)
247
252
 
248
253
 
249
- def _get_task_class(mapping, pipeline_class_name, throw_error_if_not_exist: bool = True):
250
- def get_model(pipeline_class_name):
251
- for task_mapping in SUPPORTED_TASKS_MAPPINGS:
252
- for model_name, pipeline in task_mapping.items():
253
- if pipeline.__name__ == pipeline_class_name:
254
- return model_name
254
+ def _get_model(pipeline_class_name):
255
+ for task_mapping in SUPPORTED_TASKS_MAPPINGS:
256
+ for model_name, pipeline in task_mapping.items():
257
+ if pipeline.__name__ == pipeline_class_name:
258
+ return model_name
259
+
255
260
 
256
- model_name = get_model(pipeline_class_name)
261
+ def _get_task_class(mapping, pipeline_class_name, throw_error_if_not_exist: bool = True):
262
+ model_name = _get_model(pipeline_class_name)
257
263
 
258
264
  if model_name is not None:
259
265
  task_class = mapping.get(model_name, None)
@@ -322,9 +328,8 @@ class AutoPipelineForText2Image(ConfigMixin):
322
328
  - A path to a *directory* (for example `./my_pipeline_directory/`) containing pipeline weights
323
329
  saved using
324
330
  [`~DiffusionPipeline.save_pretrained`].
325
- torch_dtype (`str` or `torch.dtype`, *optional*):
326
- Override the default `torch.dtype` and load the model with another dtype. If "auto" is passed, the
327
- dtype is automatically derived from the model's weights.
331
+ torch_dtype (`torch.dtype`, *optional*):
332
+ Override the default `torch.dtype` and load the model with another dtype.
328
333
  force_download (`bool`, *optional*, defaults to `False`):
329
334
  Whether or not to force the (re-)download of the model weights and configuration files, overriding the
330
335
  cached versions if they exist.
@@ -390,8 +395,8 @@ class AutoPipelineForText2Image(ConfigMixin):
390
395
 
391
396
  <Tip>
392
397
 
393
- To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with
394
- `huggingface-cli login`.
398
+ To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf
399
+ auth login`.
395
400
 
396
401
  </Tip>
397
402
 
@@ -619,8 +624,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
619
624
  saved using
620
625
  [`~DiffusionPipeline.save_pretrained`].
621
626
  torch_dtype (`str` or `torch.dtype`, *optional*):
622
- Override the default `torch.dtype` and load the model with another dtype. If "auto" is passed, the
623
- dtype is automatically derived from the model's weights.
627
+ Override the default `torch.dtype` and load the model with another dtype.
624
628
  force_download (`bool`, *optional*, defaults to `False`):
625
629
  Whether or not to force the (re-)download of the model weights and configuration files, overriding the
626
630
  cached versions if they exist.
@@ -686,8 +690,8 @@ class AutoPipelineForImage2Image(ConfigMixin):
686
690
 
687
691
  <Tip>
688
692
 
689
- To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with
690
- `huggingface-cli login`.
693
+ To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf
694
+ auth login`.
691
695
 
692
696
  </Tip>
693
697
 
@@ -930,8 +934,7 @@ class AutoPipelineForInpainting(ConfigMixin):
930
934
  saved using
931
935
  [`~DiffusionPipeline.save_pretrained`].
932
936
  torch_dtype (`str` or `torch.dtype`, *optional*):
933
- Override the default `torch.dtype` and load the model with another dtype. If "auto" is passed, the
934
- dtype is automatically derived from the model's weights.
937
+ Override the default `torch.dtype` and load the model with another dtype.
935
938
  force_download (`bool`, *optional*, defaults to `False`):
936
939
  Whether or not to force the (re-)download of the model weights and configuration files, overriding the
937
940
  cached versions if they exist.
@@ -997,8 +1000,8 @@ class AutoPipelineForInpainting(ConfigMixin):
997
1000
 
998
1001
  <Tip>
999
1002
 
1000
- To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with
1001
- `huggingface-cli login`.
1003
+ To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf
1004
+ auth login`.
1002
1005
 
1003
1006
  </Tip>
1004
1007
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,5 +1,5 @@
1
- # Copyright 2024 Salesforce.com, inc.
2
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Salesforce.com, inc.
2
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
5
  # you may not use this file except in compliance with the License.
@@ -1,5 +1,5 @@
1
- # Copyright 2024 Salesforce.com, inc.
2
- # Copyright 2024 The HuggingFace Team. All rights reserved.#
1
+ # Copyright 2025 Salesforce.com, inc.
2
+ # Copyright 2025 The HuggingFace Team. All rights reserved.#
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
5
5
  # You may obtain a copy of the License at
@@ -25,7 +25,7 @@ from ...utils import (
25
25
  replace_example_docstring,
26
26
  )
27
27
  from ...utils.torch_utils import randn_tensor
28
- from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
28
+ from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
29
29
  from .blip_image_processing import BlipImageProcessor
30
30
  from .modeling_blip2 import Blip2QFormerModel
31
31
  from .modeling_ctx_clip import ContextCLIPTextModel
@@ -81,7 +81,7 @@ EXAMPLE_DOC_STRING = """
81
81
  """
82
82
 
83
83
 
84
- class BlipDiffusionPipeline(DiffusionPipeline):
84
+ class BlipDiffusionPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
85
85
  """
86
86
  Pipeline for Zero-Shot Subject Driven Generation using Blip Diffusion.
87
87
 
@@ -107,6 +107,7 @@ class BlipDiffusionPipeline(DiffusionPipeline):
107
107
  Position of the context token in the text encoder.
108
108
  """
109
109
 
110
+ _last_supported_version = "0.33.1"
110
111
  model_cpu_offload_seq = "qformer->text_encoder->unet->vae"
111
112
 
112
113
  def __init__(
@@ -138,7 +139,7 @@ class BlipDiffusionPipeline(DiffusionPipeline):
138
139
  def get_query_embeddings(self, input_image, src_subject):
139
140
  return self.qformer(image_input=input_image, text_input=src_subject, return_dict=False)
140
141
 
141
- # from the original Blip Diffusion code, speciefies the target subject and augments the prompt by repeating it
142
+ # from the original Blip Diffusion code, specifies the target subject and augments the prompt by repeating it
142
143
  def _build_prompt(self, prompts, tgt_subjects, prompt_strength=1.0, prompt_reps=20):
143
144
  rv = []
144
145
  for prompt, tgt_subject in zip(prompts, tgt_subjects):
@@ -229,11 +230,11 @@ class BlipDiffusionPipeline(DiffusionPipeline):
229
230
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
230
231
  tensor will ge generated by random sampling.
231
232
  guidance_scale (`float`, *optional*, defaults to 7.5):
232
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
233
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
234
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
235
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
236
- usually at the expense of lower image quality.
233
+ Guidance scale as defined in [Classifier-Free Diffusion
234
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
235
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
236
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
237
+ the text `prompt`, usually at the expense of lower image quality.
237
238
  height (`int`, *optional*, defaults to 512):
238
239
  The height of the generated image.
239
240
  width (`int`, *optional*, defaults to 512):