diffusers 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (478) hide show
  1. diffusers/__init__.py +48 -1
  2. diffusers/commands/__init__.py +1 -1
  3. diffusers/commands/diffusers_cli.py +1 -1
  4. diffusers/commands/env.py +1 -1
  5. diffusers/commands/fp16_safetensors.py +1 -1
  6. diffusers/dependency_versions_check.py +1 -1
  7. diffusers/dependency_versions_table.py +1 -1
  8. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  9. diffusers/hooks/faster_cache.py +2 -2
  10. diffusers/hooks/group_offloading.py +128 -29
  11. diffusers/hooks/hooks.py +2 -2
  12. diffusers/hooks/layerwise_casting.py +3 -3
  13. diffusers/hooks/pyramid_attention_broadcast.py +1 -1
  14. diffusers/image_processor.py +7 -2
  15. diffusers/loaders/__init__.py +4 -0
  16. diffusers/loaders/ip_adapter.py +5 -14
  17. diffusers/loaders/lora_base.py +212 -111
  18. diffusers/loaders/lora_conversion_utils.py +275 -34
  19. diffusers/loaders/lora_pipeline.py +1554 -819
  20. diffusers/loaders/peft.py +52 -109
  21. diffusers/loaders/single_file.py +2 -2
  22. diffusers/loaders/single_file_model.py +20 -4
  23. diffusers/loaders/single_file_utils.py +225 -5
  24. diffusers/loaders/textual_inversion.py +3 -2
  25. diffusers/loaders/transformer_flux.py +1 -1
  26. diffusers/loaders/transformer_sd3.py +2 -2
  27. diffusers/loaders/unet.py +2 -16
  28. diffusers/loaders/unet_loader_utils.py +1 -1
  29. diffusers/loaders/utils.py +1 -1
  30. diffusers/models/__init__.py +15 -1
  31. diffusers/models/activations.py +5 -5
  32. diffusers/models/adapter.py +2 -3
  33. diffusers/models/attention.py +4 -4
  34. diffusers/models/attention_flax.py +10 -10
  35. diffusers/models/attention_processor.py +14 -10
  36. diffusers/models/auto_model.py +47 -10
  37. diffusers/models/autoencoders/__init__.py +1 -0
  38. diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
  39. diffusers/models/autoencoders/autoencoder_dc.py +3 -3
  40. diffusers/models/autoencoders/autoencoder_kl.py +4 -4
  41. diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
  42. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
  43. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
  44. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
  45. diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
  46. diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
  47. diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
  48. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
  49. diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
  50. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
  51. diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
  52. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  53. diffusers/models/autoencoders/vae.py +13 -2
  54. diffusers/models/autoencoders/vq_model.py +2 -2
  55. diffusers/models/cache_utils.py +1 -1
  56. diffusers/models/controlnet.py +1 -1
  57. diffusers/models/controlnet_flux.py +1 -1
  58. diffusers/models/controlnet_sd3.py +1 -1
  59. diffusers/models/controlnet_sparsectrl.py +1 -1
  60. diffusers/models/controlnets/__init__.py +1 -0
  61. diffusers/models/controlnets/controlnet.py +3 -3
  62. diffusers/models/controlnets/controlnet_flax.py +1 -1
  63. diffusers/models/controlnets/controlnet_flux.py +16 -15
  64. diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
  65. diffusers/models/controlnets/controlnet_sana.py +290 -0
  66. diffusers/models/controlnets/controlnet_sd3.py +1 -1
  67. diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
  68. diffusers/models/controlnets/controlnet_union.py +1 -1
  69. diffusers/models/controlnets/controlnet_xs.py +7 -7
  70. diffusers/models/controlnets/multicontrolnet.py +4 -5
  71. diffusers/models/controlnets/multicontrolnet_union.py +5 -6
  72. diffusers/models/downsampling.py +2 -2
  73. diffusers/models/embeddings.py +10 -12
  74. diffusers/models/embeddings_flax.py +2 -2
  75. diffusers/models/lora.py +3 -3
  76. diffusers/models/modeling_utils.py +44 -14
  77. diffusers/models/normalization.py +4 -4
  78. diffusers/models/resnet.py +2 -2
  79. diffusers/models/resnet_flax.py +1 -1
  80. diffusers/models/transformers/__init__.py +5 -0
  81. diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
  82. diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
  83. diffusers/models/transformers/consisid_transformer_3d.py +1 -1
  84. diffusers/models/transformers/dit_transformer_2d.py +2 -2
  85. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  86. diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
  87. diffusers/models/transformers/latte_transformer_3d.py +4 -5
  88. diffusers/models/transformers/lumina_nextdit2d.py +2 -2
  89. diffusers/models/transformers/pixart_transformer_2d.py +3 -3
  90. diffusers/models/transformers/prior_transformer.py +1 -1
  91. diffusers/models/transformers/sana_transformer.py +8 -3
  92. diffusers/models/transformers/stable_audio_transformer.py +5 -9
  93. diffusers/models/transformers/t5_film_transformer.py +3 -3
  94. diffusers/models/transformers/transformer_2d.py +1 -1
  95. diffusers/models/transformers/transformer_allegro.py +1 -1
  96. diffusers/models/transformers/transformer_chroma.py +742 -0
  97. diffusers/models/transformers/transformer_cogview3plus.py +5 -10
  98. diffusers/models/transformers/transformer_cogview4.py +317 -25
  99. diffusers/models/transformers/transformer_cosmos.py +579 -0
  100. diffusers/models/transformers/transformer_flux.py +9 -11
  101. diffusers/models/transformers/transformer_hidream_image.py +942 -0
  102. diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
  103. diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
  104. diffusers/models/transformers/transformer_ltx.py +2 -2
  105. diffusers/models/transformers/transformer_lumina2.py +1 -1
  106. diffusers/models/transformers/transformer_mochi.py +1 -1
  107. diffusers/models/transformers/transformer_omnigen.py +2 -2
  108. diffusers/models/transformers/transformer_sd3.py +7 -7
  109. diffusers/models/transformers/transformer_temporal.py +1 -1
  110. diffusers/models/transformers/transformer_wan.py +24 -8
  111. diffusers/models/transformers/transformer_wan_vace.py +393 -0
  112. diffusers/models/unets/unet_1d.py +1 -1
  113. diffusers/models/unets/unet_1d_blocks.py +1 -1
  114. diffusers/models/unets/unet_2d.py +1 -1
  115. diffusers/models/unets/unet_2d_blocks.py +1 -1
  116. diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
  117. diffusers/models/unets/unet_2d_condition.py +2 -2
  118. diffusers/models/unets/unet_2d_condition_flax.py +2 -2
  119. diffusers/models/unets/unet_3d_blocks.py +1 -1
  120. diffusers/models/unets/unet_3d_condition.py +3 -3
  121. diffusers/models/unets/unet_i2vgen_xl.py +3 -3
  122. diffusers/models/unets/unet_kandinsky3.py +1 -1
  123. diffusers/models/unets/unet_motion_model.py +2 -2
  124. diffusers/models/unets/unet_stable_cascade.py +1 -1
  125. diffusers/models/upsampling.py +2 -2
  126. diffusers/models/vae_flax.py +2 -2
  127. diffusers/models/vq_model.py +1 -1
  128. diffusers/pipelines/__init__.py +37 -6
  129. diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
  130. diffusers/pipelines/amused/pipeline_amused.py +7 -6
  131. diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
  132. diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
  133. diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
  134. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
  135. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
  136. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
  137. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
  138. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
  139. diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
  140. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  141. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
  142. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
  143. diffusers/pipelines/auto_pipeline.py +6 -7
  144. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  145. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  146. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
  147. diffusers/pipelines/chroma/__init__.py +49 -0
  148. diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
  149. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
  150. diffusers/pipelines/chroma/pipeline_output.py +21 -0
  151. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
  152. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
  153. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
  154. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
  155. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
  156. diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
  157. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
  158. diffusers/pipelines/consisid/consisid_utils.py +2 -2
  159. diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
  160. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  161. diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
  162. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
  163. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
  164. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
  165. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
  166. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
  167. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
  168. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
  169. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
  170. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
  171. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  172. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
  173. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
  174. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  175. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
  176. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
  177. diffusers/pipelines/cosmos/__init__.py +54 -0
  178. diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
  179. diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
  180. diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
  181. diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
  182. diffusers/pipelines/cosmos/pipeline_output.py +40 -0
  183. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
  184. diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
  185. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  186. diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
  187. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
  188. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
  189. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
  190. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
  191. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
  192. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
  193. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
  194. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  195. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
  196. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  197. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
  198. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
  199. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  200. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  201. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  202. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  203. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  204. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
  205. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
  206. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
  207. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
  208. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
  209. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
  210. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  211. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
  212. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
  213. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
  214. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
  215. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
  216. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  217. diffusers/pipelines/dit/pipeline_dit.py +1 -1
  218. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
  219. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
  220. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
  221. diffusers/pipelines/flux/modeling_flux.py +1 -1
  222. diffusers/pipelines/flux/pipeline_flux.py +10 -17
  223. diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
  224. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
  225. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
  226. diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
  227. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
  228. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
  229. diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
  230. diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
  231. diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
  232. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
  233. diffusers/pipelines/free_init_utils.py +2 -2
  234. diffusers/pipelines/free_noise_utils.py +3 -3
  235. diffusers/pipelines/hidream_image/__init__.py +47 -0
  236. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
  237. diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
  238. diffusers/pipelines/hunyuan_video/__init__.py +2 -0
  239. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
  240. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
  241. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
  242. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
  243. diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
  244. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
  245. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
  246. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
  247. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
  248. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
  249. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
  250. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
  251. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  252. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
  253. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
  254. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
  255. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
  256. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
  257. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
  258. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
  259. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
  260. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
  261. diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
  262. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
  263. diffusers/pipelines/kolors/text_encoder.py +3 -3
  264. diffusers/pipelines/kolors/tokenizer.py +1 -1
  265. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
  266. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
  267. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  268. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
  269. diffusers/pipelines/latte/pipeline_latte.py +12 -12
  270. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
  271. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
  272. diffusers/pipelines/ltx/__init__.py +4 -0
  273. diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
  274. diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
  275. diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
  276. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
  277. diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
  278. diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
  279. diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
  280. diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
  281. diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
  282. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
  283. diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
  284. diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
  285. diffusers/pipelines/onnx_utils.py +15 -2
  286. diffusers/pipelines/pag/pag_utils.py +2 -2
  287. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
  288. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
  289. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
  290. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
  291. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
  292. diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
  293. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
  294. diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
  295. diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
  296. diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
  297. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
  298. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
  299. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
  300. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
  301. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
  302. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
  303. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
  304. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  305. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
  306. diffusers/pipelines/pia/pipeline_pia.py +8 -6
  307. diffusers/pipelines/pipeline_flax_utils.py +3 -4
  308. diffusers/pipelines/pipeline_loading_utils.py +89 -13
  309. diffusers/pipelines/pipeline_utils.py +105 -33
  310. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
  311. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
  312. diffusers/pipelines/sana/__init__.py +4 -0
  313. diffusers/pipelines/sana/pipeline_sana.py +23 -21
  314. diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
  315. diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
  316. diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
  317. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
  318. diffusers/pipelines/shap_e/camera.py +1 -1
  319. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  320. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  321. diffusers/pipelines/shap_e/renderer.py +3 -3
  322. diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
  323. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
  324. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
  325. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
  326. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
  327. diffusers/pipelines/stable_diffusion/__init__.py +0 -7
  328. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  329. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
  330. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  331. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
  332. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  333. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
  334. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
  335. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
  336. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
  337. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
  338. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
  339. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
  340. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
  341. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
  342. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
  343. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
  344. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
  345. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
  346. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
  347. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  348. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  349. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  350. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
  351. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
  352. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
  353. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
  354. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
  355. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
  356. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
  357. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
  358. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
  359. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
  360. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
  361. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
  362. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  363. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
  364. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  365. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
  366. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
  367. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
  368. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
  369. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
  370. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
  371. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
  372. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
  373. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
  374. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
  375. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
  376. diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
  377. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
  378. diffusers/pipelines/unclip/text_proj.py +2 -2
  379. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
  380. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  381. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
  382. diffusers/pipelines/visualcloze/__init__.py +52 -0
  383. diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
  384. diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
  385. diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
  386. diffusers/pipelines/wan/__init__.py +2 -0
  387. diffusers/pipelines/wan/pipeline_wan.py +17 -12
  388. diffusers/pipelines/wan/pipeline_wan_i2v.py +42 -20
  389. diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
  390. diffusers/pipelines/wan/pipeline_wan_video2video.py +18 -18
  391. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  392. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
  393. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  394. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  395. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
  396. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
  397. diffusers/quantizers/__init__.py +179 -1
  398. diffusers/quantizers/base.py +6 -1
  399. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
  400. diffusers/quantizers/bitsandbytes/utils.py +10 -7
  401. diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
  402. diffusers/quantizers/gguf/utils.py +16 -13
  403. diffusers/quantizers/quantization_config.py +18 -16
  404. diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
  405. diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
  406. diffusers/schedulers/__init__.py +3 -1
  407. diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
  408. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  409. diffusers/schedulers/scheduling_consistency_models.py +1 -1
  410. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
  411. diffusers/schedulers/scheduling_ddim.py +8 -8
  412. diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
  413. diffusers/schedulers/scheduling_ddim_flax.py +6 -6
  414. diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
  415. diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
  416. diffusers/schedulers/scheduling_ddpm.py +9 -9
  417. diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
  418. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
  419. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
  420. diffusers/schedulers/scheduling_deis_multistep.py +8 -8
  421. diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
  422. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
  423. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
  424. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
  425. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  426. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
  427. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
  428. diffusers/schedulers/scheduling_edm_euler.py +20 -11
  429. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
  430. diffusers/schedulers/scheduling_euler_discrete.py +3 -3
  431. diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
  432. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
  433. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
  434. diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
  435. diffusers/schedulers/scheduling_heun_discrete.py +2 -2
  436. diffusers/schedulers/scheduling_ipndm.py +2 -2
  437. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
  438. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
  439. diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
  440. diffusers/schedulers/scheduling_lcm.py +3 -3
  441. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  442. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  443. diffusers/schedulers/scheduling_pndm.py +4 -4
  444. diffusers/schedulers/scheduling_pndm_flax.py +4 -4
  445. diffusers/schedulers/scheduling_repaint.py +9 -9
  446. diffusers/schedulers/scheduling_sasolver.py +15 -15
  447. diffusers/schedulers/scheduling_scm.py +1 -1
  448. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  449. diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
  450. diffusers/schedulers/scheduling_tcd.py +3 -3
  451. diffusers/schedulers/scheduling_unclip.py +5 -5
  452. diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
  453. diffusers/schedulers/scheduling_utils.py +1 -1
  454. diffusers/schedulers/scheduling_utils_flax.py +1 -1
  455. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  456. diffusers/training_utils.py +13 -5
  457. diffusers/utils/__init__.py +5 -0
  458. diffusers/utils/accelerate_utils.py +1 -1
  459. diffusers/utils/doc_utils.py +1 -1
  460. diffusers/utils/dummy_pt_objects.py +120 -0
  461. diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
  462. diffusers/utils/dynamic_modules_utils.py +21 -3
  463. diffusers/utils/export_utils.py +1 -1
  464. diffusers/utils/import_utils.py +81 -18
  465. diffusers/utils/logging.py +1 -1
  466. diffusers/utils/outputs.py +2 -1
  467. diffusers/utils/peft_utils.py +91 -8
  468. diffusers/utils/state_dict_utils.py +20 -3
  469. diffusers/utils/testing_utils.py +59 -7
  470. diffusers/utils/torch_utils.py +25 -5
  471. diffusers/video_processor.py +2 -2
  472. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/METADATA +3 -3
  473. diffusers-0.34.0.dist-info/RECORD +639 -0
  474. diffusers-0.33.0.dist-info/RECORD +0 -608
  475. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
  476. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/WHEEL +0 -0
  477. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
  478. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2024 PixArt-Sigma Authors and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 PixArt-Sigma Authors and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -35,7 +35,7 @@ from ...utils import (
35
35
  logging,
36
36
  replace_example_docstring,
37
37
  )
38
- from ...utils.torch_utils import randn_tensor
38
+ from ...utils.torch_utils import get_device, is_torch_version, randn_tensor
39
39
  from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
40
40
  from ..pixart_alpha.pipeline_pixart_alpha import (
41
41
  ASPECT_RATIO_512_BIN,
@@ -363,7 +363,7 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
363
363
  def prepare_extra_step_kwargs(self, generator, eta):
364
364
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
365
365
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
366
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
366
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
367
367
  # and should be between [0, 1]
368
368
 
369
369
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -524,7 +524,7 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
524
524
  # &amp
525
525
  caption = re.sub(r"&amp", "", caption)
526
526
 
527
- # ip adresses:
527
+ # ip addresses:
528
528
  caption = re.sub(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", " ", caption)
529
529
 
530
530
  # article ids:
@@ -683,11 +683,11 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
683
683
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
684
684
  will be used.
685
685
  guidance_scale (`float`, *optional*, defaults to 4.5):
686
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
687
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
688
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
689
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
690
- usually at the expense of lower image quality.
686
+ Guidance scale as defined in [Classifier-Free Diffusion
687
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
688
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
689
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
690
+ the text `prompt`, usually at the expense of lower image quality.
691
691
  num_images_per_prompt (`int`, *optional*, defaults to 1):
692
692
  The number of images to generate per prompt.
693
693
  height (`int`, *optional*, defaults to self.unet.config.sample_size):
@@ -695,8 +695,8 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
695
695
  width (`int`, *optional*, defaults to self.unet.config.sample_size):
696
696
  The width in pixels of the generated image.
697
697
  eta (`float`, *optional*, defaults to 0.0):
698
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
699
- [`schedulers.DDIMScheduler`], will be ignored for others.
698
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
699
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
700
700
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
701
701
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
702
702
  to make generation deterministic.
@@ -917,9 +917,15 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
917
917
  image = latents
918
918
  else:
919
919
  latents = latents.to(self.vae.dtype)
920
+ torch_accelerator_module = getattr(torch, get_device(), torch.cuda)
921
+ oom_error = (
922
+ torch.OutOfMemoryError
923
+ if is_torch_version(">=", "2.5.0")
924
+ else torch_accelerator_module.OutOfMemoryError
925
+ )
920
926
  try:
921
927
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
922
- except torch.cuda.OutOfMemoryError as e:
928
+ except oom_error as e:
923
929
  warnings.warn(
924
930
  f"{e}. \n"
925
931
  f"Try to use VAE tiling for large images. For example: \n"
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -72,7 +72,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
72
72
  r"""
73
73
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
74
74
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
75
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
75
+ Flawed](https://huggingface.co/papers/2305.08891).
76
76
 
77
77
  Args:
78
78
  noise_cfg (`torch.Tensor`):
@@ -573,7 +573,7 @@ class StableDiffusionPAGPipeline(
573
573
  def prepare_extra_step_kwargs(self, generator, eta):
574
574
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
575
575
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
576
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
576
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
577
577
  # and should be between [0, 1]
578
578
 
579
579
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -724,7 +724,7 @@ class StableDiffusionPAGPipeline(
724
724
  return self._clip_skip
725
725
 
726
726
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
727
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
727
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
728
728
  # corresponds to doing no classifier free guidance.
729
729
  @property
730
730
  def do_classifier_free_guidance(self):
@@ -802,8 +802,8 @@ class StableDiffusionPAGPipeline(
802
802
  num_images_per_prompt (`int`, *optional*, defaults to 1):
803
803
  The number of images to generate per prompt.
804
804
  eta (`float`, *optional*, defaults to 0.0):
805
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
806
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
805
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
806
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
807
807
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
808
808
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
809
809
  generation deterministic.
@@ -833,7 +833,7 @@ class StableDiffusionPAGPipeline(
833
833
  [`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
834
834
  guidance_rescale (`float`, *optional*, defaults to 0.0):
835
835
  Guidance rescale factor from [Common Diffusion Noise Schedules and Sample Steps are
836
- Flawed](https://arxiv.org/pdf/2305.08891.pdf). Guidance rescale factor should fix overexposure when
836
+ Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
837
837
  using zero terminal SNR.
838
838
  clip_skip (`int`, *optional*):
839
839
  Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
@@ -1027,7 +1027,7 @@ class StableDiffusionPAGPipeline(
1027
1027
  noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
1028
1028
 
1029
1029
  if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
1030
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
1030
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
1031
1031
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
1032
1032
 
1033
1033
  # compute the previous noisy sample x_t -> x_t-1
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Stability AI and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Stability AI and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -663,7 +663,7 @@ class StableDiffusion3PAGPipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSin
663
663
  return self._clip_skip
664
664
 
665
665
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
666
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
666
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
667
667
  # corresponds to doing no classifier free guidance.
668
668
  @property
669
669
  def do_classifier_free_guidance(self):
@@ -738,11 +738,11 @@ class StableDiffusion3PAGPipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSin
738
738
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
739
739
  will be used.
740
740
  guidance_scale (`float`, *optional*, defaults to 7.0):
741
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
742
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
743
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
744
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
745
- usually at the expense of lower image quality.
741
+ Guidance scale as defined in [Classifier-Free Diffusion
742
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
743
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
744
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
745
+ the text `prompt`, usually at the expense of lower image quality.
746
746
  negative_prompt (`str` or `List[str]`, *optional*):
747
747
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
748
748
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Stability AI and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Stability AI and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -714,7 +714,7 @@ class StableDiffusion3PAGImg2ImgPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
714
714
  return self._clip_skip
715
715
 
716
716
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
717
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
717
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
718
718
  # corresponds to doing no classifier free guidance.
719
719
  @property
720
720
  def do_classifier_free_guidance(self):
@@ -799,11 +799,11 @@ class StableDiffusion3PAGImg2ImgPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
799
799
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
800
800
  will be used.
801
801
  guidance_scale (`float`, *optional*, defaults to 7.0):
802
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
803
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
804
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
805
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
806
- usually at the expense of lower image quality.
802
+ Guidance scale as defined in [Classifier-Free Diffusion
803
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
804
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
805
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
806
+ the text `prompt`, usually at the expense of lower image quality.
807
807
  negative_prompt (`str` or `List[str]`, *optional*):
808
808
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
809
809
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -438,7 +438,7 @@ class AnimateDiffPAGPipeline(
438
438
  def prepare_extra_step_kwargs(self, generator, eta):
439
439
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
440
440
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
441
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
441
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
442
442
  # and should be between [0, 1]
443
443
 
444
444
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -520,7 +520,7 @@ class AnimateDiffPAGPipeline(
520
520
  def prepare_latents(
521
521
  self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
522
522
  ):
523
- # If FreeNoise is enabled, generate latents as described in Equation (7) of [FreeNoise](https://arxiv.org/abs/2310.15169)
523
+ # If FreeNoise is enabled, generate latents as described in Equation (7) of [FreeNoise](https://huggingface.co/papers/2310.15169)
524
524
  if self.free_noise_enabled:
525
525
  latents = self._prepare_latents_free_noise(
526
526
  batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents
@@ -558,7 +558,7 @@ class AnimateDiffPAGPipeline(
558
558
  return self._clip_skip
559
559
 
560
560
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
561
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
561
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
562
562
  # corresponds to doing no classifier free guidance.
563
563
  @property
564
564
  def do_classifier_free_guidance(self):
@@ -624,8 +624,8 @@ class AnimateDiffPAGPipeline(
624
624
  The prompt or prompts to guide what to not include in image generation. If not defined, you need to
625
625
  pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
626
626
  eta (`float`, *optional*, defaults to 0.0):
627
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
628
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
627
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
628
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
629
629
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
630
630
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
631
631
  generation deterministic.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -568,7 +568,7 @@ class StableDiffusionPAGImg2ImgPipeline(
568
568
  def prepare_extra_step_kwargs(self, generator, eta):
569
569
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
570
570
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
571
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
571
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
572
572
  # and should be between [0, 1]
573
573
 
574
574
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -761,7 +761,7 @@ class StableDiffusionPAGImg2ImgPipeline(
761
761
  return self._clip_skip
762
762
 
763
763
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
764
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
764
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
765
765
  # corresponds to doing no classifier free guidance.
766
766
  @property
767
767
  def do_classifier_free_guidance(self):
@@ -847,8 +847,8 @@ class StableDiffusionPAGImg2ImgPipeline(
847
847
  num_images_per_prompt (`int`, *optional*, defaults to 1):
848
848
  The number of images to generate per prompt.
849
849
  eta (`float`, *optional*, defaults to 0.0):
850
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
851
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
850
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
851
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
852
852
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
853
853
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
854
854
  generation deterministic.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -99,7 +99,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
99
99
  r"""
100
100
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
101
101
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
102
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
102
+ Flawed](https://huggingface.co/papers/2305.08891).
103
103
 
104
104
  Args:
105
105
  noise_cfg (`torch.Tensor`):
@@ -603,7 +603,7 @@ class StableDiffusionPAGInpaintPipeline(
603
603
  def prepare_extra_step_kwargs(self, generator, eta):
604
604
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
605
605
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
606
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
606
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
607
607
  # and should be between [0, 1]
608
608
 
609
609
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -889,7 +889,7 @@ class StableDiffusionPAGInpaintPipeline(
889
889
  return self._clip_skip
890
890
 
891
891
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
892
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
892
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
893
893
  # corresponds to doing no classifier free guidance.
894
894
  @property
895
895
  def do_classifier_free_guidance(self):
@@ -972,8 +972,8 @@ class StableDiffusionPAGInpaintPipeline(
972
972
  num_images_per_prompt (`int`, *optional*, defaults to 1):
973
973
  The number of images to generate per prompt.
974
974
  eta (`float`, *optional*, defaults to 0.0):
975
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
976
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
975
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
976
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
977
977
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
978
978
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
979
979
  generation deterministic.
@@ -1003,7 +1003,7 @@ class StableDiffusionPAGInpaintPipeline(
1003
1003
  [`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
1004
1004
  guidance_rescale (`float`, *optional*, defaults to 0.0):
1005
1005
  Guidance rescale factor from [Common Diffusion Noise Schedules and Sample Steps are
1006
- Flawed](https://arxiv.org/pdf/2305.08891.pdf). Guidance rescale factor should fix overexposure when
1006
+ Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
1007
1007
  using zero terminal SNR.
1008
1008
  clip_skip (`int`, *optional*):
1009
1009
  Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
@@ -1294,7 +1294,7 @@ class StableDiffusionPAGInpaintPipeline(
1294
1294
  noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
1295
1295
 
1296
1296
  if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
1297
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
1297
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
1298
1298
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
1299
1299
 
1300
1300
  # compute the previous noisy sample x_t -> x_t-1
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -91,7 +91,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
91
91
  r"""
92
92
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
93
93
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
94
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
94
+ Flawed](https://huggingface.co/papers/2305.08891).
95
95
 
96
96
  Args:
97
97
  noise_cfg (`torch.Tensor`):
@@ -607,7 +607,7 @@ class StableDiffusionXLPAGPipeline(
607
607
  def prepare_extra_step_kwargs(self, generator, eta):
608
608
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
609
609
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
610
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
610
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
611
611
  # and should be between [0, 1]
612
612
 
613
613
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -823,7 +823,7 @@ class StableDiffusionXLPAGPipeline(
823
823
  return self._clip_skip
824
824
 
825
825
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
826
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
826
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
827
827
  # corresponds to doing no classifier free guidance.
828
828
  @property
829
829
  def do_classifier_free_guidance(self):
@@ -925,11 +925,11 @@ class StableDiffusionXLPAGPipeline(
925
925
  "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
926
926
  Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
927
927
  guidance_scale (`float`, *optional*, defaults to 5.0):
928
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
929
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
930
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
931
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
932
- usually at the expense of lower image quality.
928
+ Guidance scale as defined in [Classifier-Free Diffusion
929
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
930
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
931
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
932
+ the text `prompt`, usually at the expense of lower image quality.
933
933
  negative_prompt (`str` or `List[str]`, *optional*):
934
934
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
935
935
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -940,8 +940,8 @@ class StableDiffusionXLPAGPipeline(
940
940
  num_images_per_prompt (`int`, *optional*, defaults to 1):
941
941
  The number of images to generate per prompt.
942
942
  eta (`float`, *optional*, defaults to 0.0):
943
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
944
- [`schedulers.DDIMScheduler`], will be ignored for others.
943
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
944
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
945
945
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
946
946
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
947
947
  to make generation deterministic.
@@ -981,9 +981,10 @@ class StableDiffusionXLPAGPipeline(
981
981
  [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
982
982
  guidance_rescale (`float`, *optional*, defaults to 0.0):
983
983
  Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
984
- Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
985
- [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
986
- Guidance rescale factor should fix overexposure when using zero terminal SNR.
984
+ Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
985
+ [Common Diffusion Noise Schedules and Sample Steps are
986
+ Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
987
+ using zero terminal SNR.
987
988
  original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
988
989
  If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
989
990
  `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
@@ -1266,7 +1267,7 @@ class StableDiffusionXLPAGPipeline(
1266
1267
  noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
1267
1268
 
1268
1269
  if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
1269
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
1270
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
1270
1271
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
1271
1272
 
1272
1273
  # compute the previous noisy sample x_t -> x_t-1
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -49,7 +49,7 @@ from ...utils import (
49
49
  scale_lora_layers,
50
50
  unscale_lora_layers,
51
51
  )
52
- from ...utils.torch_utils import randn_tensor
52
+ from ...utils.torch_utils import empty_device_cache, randn_tensor
53
53
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
54
54
  from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
55
55
  from .pag_utils import PAGMixin
@@ -95,7 +95,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
95
95
  r"""
96
96
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
97
97
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
98
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
98
+ Flawed](https://huggingface.co/papers/2305.08891).
99
99
 
100
100
  Args:
101
101
  noise_cfg (`torch.Tensor`):
@@ -553,7 +553,7 @@ class StableDiffusionXLPAGImg2ImgPipeline(
553
553
  def prepare_extra_step_kwargs(self, generator, eta):
554
554
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
555
555
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
556
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
556
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
557
557
  # and should be between [0, 1]
558
558
 
559
559
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -716,7 +716,7 @@ class StableDiffusionXLPAGImg2ImgPipeline(
716
716
  # Offload text encoder if `enable_model_cpu_offload` was enabled
717
717
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
718
718
  self.text_encoder_2.to("cpu")
719
- torch.cuda.empty_cache()
719
+ empty_device_cache()
720
720
 
721
721
  image = image.to(device=device, dtype=dtype)
722
722
 
@@ -970,7 +970,7 @@ class StableDiffusionXLPAGImg2ImgPipeline(
970
970
  return self._clip_skip
971
971
 
972
972
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
973
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
973
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
974
974
  # corresponds to doing no classifier free guidance.
975
975
  @property
976
976
  def do_classifier_free_guidance(self):
@@ -1088,11 +1088,11 @@ class StableDiffusionXLPAGImg2ImgPipeline(
1088
1088
  forms a part of a "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refine Image
1089
1089
  Quality**](https://huggingface.co/docs/diffusers/using-diffusers/sdxl#refine-image-quality).
1090
1090
  guidance_scale (`float`, *optional*, defaults to 7.5):
1091
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
1092
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
1093
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
1094
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
1095
- usually at the expense of lower image quality.
1091
+ Guidance scale as defined in [Classifier-Free Diffusion
1092
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
1093
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
1094
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
1095
+ the text `prompt`, usually at the expense of lower image quality.
1096
1096
  negative_prompt (`str` or `List[str]`, *optional*):
1097
1097
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
1098
1098
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -1103,8 +1103,8 @@ class StableDiffusionXLPAGImg2ImgPipeline(
1103
1103
  num_images_per_prompt (`int`, *optional*, defaults to 1):
1104
1104
  The number of images to generate per prompt.
1105
1105
  eta (`float`, *optional*, defaults to 0.0):
1106
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
1107
- [`schedulers.DDIMScheduler`], will be ignored for others.
1106
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
1107
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
1108
1108
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
1109
1109
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
1110
1110
  to make generation deterministic.
@@ -1144,9 +1144,10 @@ class StableDiffusionXLPAGImg2ImgPipeline(
1144
1144
  [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
1145
1145
  guidance_rescale (`float`, *optional*, defaults to 0.0):
1146
1146
  Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
1147
- Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
1148
- [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
1149
- Guidance rescale factor should fix overexposure when using zero terminal SNR.
1147
+ Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
1148
+ [Common Diffusion Noise Schedules and Sample Steps are
1149
+ Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
1150
+ using zero terminal SNR.
1150
1151
  original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
1151
1152
  If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
1152
1153
  `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
@@ -1461,7 +1462,7 @@ class StableDiffusionXLPAGImg2ImgPipeline(
1461
1462
  noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
1462
1463
 
1463
1464
  if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
1464
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
1465
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
1465
1466
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
1466
1467
 
1467
1468
  # compute the previous noisy sample x_t -> x_t-1
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -108,7 +108,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
108
108
  r"""
109
109
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
110
110
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
111
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
111
+ Flawed](https://huggingface.co/papers/2305.08891).
112
112
 
113
113
  Args:
114
114
  noise_cfg (`torch.Tensor`):
@@ -643,7 +643,7 @@ class StableDiffusionXLPAGInpaintPipeline(
643
643
  def prepare_extra_step_kwargs(self, generator, eta):
644
644
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
645
645
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
646
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
646
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
647
647
  # and should be between [0, 1]
648
648
 
649
649
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -1061,7 +1061,7 @@ class StableDiffusionXLPAGInpaintPipeline(
1061
1061
  return self._clip_skip
1062
1062
 
1063
1063
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
1064
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
1064
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
1065
1065
  # corresponds to doing no classifier free guidance.
1066
1066
  @property
1067
1067
  def do_classifier_free_guidance(self):
@@ -1208,11 +1208,11 @@ class StableDiffusionXLPAGInpaintPipeline(
1208
1208
  forms a part of a "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
1209
1209
  Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output).
1210
1210
  guidance_scale (`float`, *optional*, defaults to 7.5):
1211
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
1212
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
1213
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
1214
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
1215
- usually at the expense of lower image quality.
1211
+ Guidance scale as defined in [Classifier-Free Diffusion
1212
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
1213
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
1214
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
1215
+ the text `prompt`, usually at the expense of lower image quality.
1216
1216
  negative_prompt (`str` or `List[str]`, *optional*):
1217
1217
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
1218
1218
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -1243,8 +1243,8 @@ class StableDiffusionXLPAGInpaintPipeline(
1243
1243
  num_images_per_prompt (`int`, *optional*, defaults to 1):
1244
1244
  The number of images to generate per prompt.
1245
1245
  eta (`float`, *optional*, defaults to 0.0):
1246
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
1247
- [`schedulers.DDIMScheduler`], will be ignored for others.
1246
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
1247
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
1248
1248
  generator (`torch.Generator`, *optional*):
1249
1249
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
1250
1250
  to make generation deterministic.
@@ -1673,7 +1673,7 @@ class StableDiffusionXLPAGInpaintPipeline(
1673
1673
  noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
1674
1674
 
1675
1675
  if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
1676
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
1676
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
1677
1677
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
1678
1678
 
1679
1679
  # compute the previous noisy sample x_t -> x_t-1
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.