diffusers 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (478) hide show
  1. diffusers/__init__.py +48 -1
  2. diffusers/commands/__init__.py +1 -1
  3. diffusers/commands/diffusers_cli.py +1 -1
  4. diffusers/commands/env.py +1 -1
  5. diffusers/commands/fp16_safetensors.py +1 -1
  6. diffusers/dependency_versions_check.py +1 -1
  7. diffusers/dependency_versions_table.py +1 -1
  8. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  9. diffusers/hooks/faster_cache.py +2 -2
  10. diffusers/hooks/group_offloading.py +128 -29
  11. diffusers/hooks/hooks.py +2 -2
  12. diffusers/hooks/layerwise_casting.py +3 -3
  13. diffusers/hooks/pyramid_attention_broadcast.py +1 -1
  14. diffusers/image_processor.py +7 -2
  15. diffusers/loaders/__init__.py +4 -0
  16. diffusers/loaders/ip_adapter.py +5 -14
  17. diffusers/loaders/lora_base.py +212 -111
  18. diffusers/loaders/lora_conversion_utils.py +275 -34
  19. diffusers/loaders/lora_pipeline.py +1554 -819
  20. diffusers/loaders/peft.py +52 -109
  21. diffusers/loaders/single_file.py +2 -2
  22. diffusers/loaders/single_file_model.py +20 -4
  23. diffusers/loaders/single_file_utils.py +225 -5
  24. diffusers/loaders/textual_inversion.py +3 -2
  25. diffusers/loaders/transformer_flux.py +1 -1
  26. diffusers/loaders/transformer_sd3.py +2 -2
  27. diffusers/loaders/unet.py +2 -16
  28. diffusers/loaders/unet_loader_utils.py +1 -1
  29. diffusers/loaders/utils.py +1 -1
  30. diffusers/models/__init__.py +15 -1
  31. diffusers/models/activations.py +5 -5
  32. diffusers/models/adapter.py +2 -3
  33. diffusers/models/attention.py +4 -4
  34. diffusers/models/attention_flax.py +10 -10
  35. diffusers/models/attention_processor.py +14 -10
  36. diffusers/models/auto_model.py +47 -10
  37. diffusers/models/autoencoders/__init__.py +1 -0
  38. diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
  39. diffusers/models/autoencoders/autoencoder_dc.py +3 -3
  40. diffusers/models/autoencoders/autoencoder_kl.py +4 -4
  41. diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
  42. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
  43. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
  44. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
  45. diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
  46. diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
  47. diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
  48. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
  49. diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
  50. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
  51. diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
  52. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  53. diffusers/models/autoencoders/vae.py +13 -2
  54. diffusers/models/autoencoders/vq_model.py +2 -2
  55. diffusers/models/cache_utils.py +1 -1
  56. diffusers/models/controlnet.py +1 -1
  57. diffusers/models/controlnet_flux.py +1 -1
  58. diffusers/models/controlnet_sd3.py +1 -1
  59. diffusers/models/controlnet_sparsectrl.py +1 -1
  60. diffusers/models/controlnets/__init__.py +1 -0
  61. diffusers/models/controlnets/controlnet.py +3 -3
  62. diffusers/models/controlnets/controlnet_flax.py +1 -1
  63. diffusers/models/controlnets/controlnet_flux.py +16 -15
  64. diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
  65. diffusers/models/controlnets/controlnet_sana.py +290 -0
  66. diffusers/models/controlnets/controlnet_sd3.py +1 -1
  67. diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
  68. diffusers/models/controlnets/controlnet_union.py +1 -1
  69. diffusers/models/controlnets/controlnet_xs.py +7 -7
  70. diffusers/models/controlnets/multicontrolnet.py +4 -5
  71. diffusers/models/controlnets/multicontrolnet_union.py +5 -6
  72. diffusers/models/downsampling.py +2 -2
  73. diffusers/models/embeddings.py +10 -12
  74. diffusers/models/embeddings_flax.py +2 -2
  75. diffusers/models/lora.py +3 -3
  76. diffusers/models/modeling_utils.py +44 -14
  77. diffusers/models/normalization.py +4 -4
  78. diffusers/models/resnet.py +2 -2
  79. diffusers/models/resnet_flax.py +1 -1
  80. diffusers/models/transformers/__init__.py +5 -0
  81. diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
  82. diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
  83. diffusers/models/transformers/consisid_transformer_3d.py +1 -1
  84. diffusers/models/transformers/dit_transformer_2d.py +2 -2
  85. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  86. diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
  87. diffusers/models/transformers/latte_transformer_3d.py +4 -5
  88. diffusers/models/transformers/lumina_nextdit2d.py +2 -2
  89. diffusers/models/transformers/pixart_transformer_2d.py +3 -3
  90. diffusers/models/transformers/prior_transformer.py +1 -1
  91. diffusers/models/transformers/sana_transformer.py +8 -3
  92. diffusers/models/transformers/stable_audio_transformer.py +5 -9
  93. diffusers/models/transformers/t5_film_transformer.py +3 -3
  94. diffusers/models/transformers/transformer_2d.py +1 -1
  95. diffusers/models/transformers/transformer_allegro.py +1 -1
  96. diffusers/models/transformers/transformer_chroma.py +742 -0
  97. diffusers/models/transformers/transformer_cogview3plus.py +5 -10
  98. diffusers/models/transformers/transformer_cogview4.py +317 -25
  99. diffusers/models/transformers/transformer_cosmos.py +579 -0
  100. diffusers/models/transformers/transformer_flux.py +9 -11
  101. diffusers/models/transformers/transformer_hidream_image.py +942 -0
  102. diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
  103. diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
  104. diffusers/models/transformers/transformer_ltx.py +2 -2
  105. diffusers/models/transformers/transformer_lumina2.py +1 -1
  106. diffusers/models/transformers/transformer_mochi.py +1 -1
  107. diffusers/models/transformers/transformer_omnigen.py +2 -2
  108. diffusers/models/transformers/transformer_sd3.py +7 -7
  109. diffusers/models/transformers/transformer_temporal.py +1 -1
  110. diffusers/models/transformers/transformer_wan.py +24 -8
  111. diffusers/models/transformers/transformer_wan_vace.py +393 -0
  112. diffusers/models/unets/unet_1d.py +1 -1
  113. diffusers/models/unets/unet_1d_blocks.py +1 -1
  114. diffusers/models/unets/unet_2d.py +1 -1
  115. diffusers/models/unets/unet_2d_blocks.py +1 -1
  116. diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
  117. diffusers/models/unets/unet_2d_condition.py +2 -2
  118. diffusers/models/unets/unet_2d_condition_flax.py +2 -2
  119. diffusers/models/unets/unet_3d_blocks.py +1 -1
  120. diffusers/models/unets/unet_3d_condition.py +3 -3
  121. diffusers/models/unets/unet_i2vgen_xl.py +3 -3
  122. diffusers/models/unets/unet_kandinsky3.py +1 -1
  123. diffusers/models/unets/unet_motion_model.py +2 -2
  124. diffusers/models/unets/unet_stable_cascade.py +1 -1
  125. diffusers/models/upsampling.py +2 -2
  126. diffusers/models/vae_flax.py +2 -2
  127. diffusers/models/vq_model.py +1 -1
  128. diffusers/pipelines/__init__.py +37 -6
  129. diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
  130. diffusers/pipelines/amused/pipeline_amused.py +7 -6
  131. diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
  132. diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
  133. diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
  134. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
  135. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
  136. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
  137. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
  138. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
  139. diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
  140. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  141. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
  142. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
  143. diffusers/pipelines/auto_pipeline.py +6 -7
  144. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  145. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  146. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
  147. diffusers/pipelines/chroma/__init__.py +49 -0
  148. diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
  149. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
  150. diffusers/pipelines/chroma/pipeline_output.py +21 -0
  151. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
  152. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
  153. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
  154. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
  155. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
  156. diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
  157. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
  158. diffusers/pipelines/consisid/consisid_utils.py +2 -2
  159. diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
  160. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  161. diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
  162. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
  163. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
  164. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
  165. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
  166. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
  167. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
  168. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
  169. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
  170. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
  171. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  172. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
  173. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
  174. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  175. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
  176. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
  177. diffusers/pipelines/cosmos/__init__.py +54 -0
  178. diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
  179. diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
  180. diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
  181. diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
  182. diffusers/pipelines/cosmos/pipeline_output.py +40 -0
  183. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
  184. diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
  185. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  186. diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
  187. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
  188. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
  189. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
  190. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
  191. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
  192. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
  193. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
  194. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  195. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
  196. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  197. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
  198. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
  199. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  200. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  201. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  202. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  203. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  204. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
  205. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
  206. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
  207. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
  208. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
  209. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
  210. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  211. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
  212. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
  213. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
  214. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
  215. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
  216. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  217. diffusers/pipelines/dit/pipeline_dit.py +1 -1
  218. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
  219. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
  220. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
  221. diffusers/pipelines/flux/modeling_flux.py +1 -1
  222. diffusers/pipelines/flux/pipeline_flux.py +10 -17
  223. diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
  224. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
  225. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
  226. diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
  227. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
  228. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
  229. diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
  230. diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
  231. diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
  232. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
  233. diffusers/pipelines/free_init_utils.py +2 -2
  234. diffusers/pipelines/free_noise_utils.py +3 -3
  235. diffusers/pipelines/hidream_image/__init__.py +47 -0
  236. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
  237. diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
  238. diffusers/pipelines/hunyuan_video/__init__.py +2 -0
  239. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
  240. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
  241. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
  242. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
  243. diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
  244. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
  245. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
  246. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
  247. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
  248. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
  249. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
  250. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
  251. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  252. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
  253. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
  254. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
  255. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
  256. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
  257. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
  258. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
  259. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
  260. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
  261. diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
  262. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
  263. diffusers/pipelines/kolors/text_encoder.py +3 -3
  264. diffusers/pipelines/kolors/tokenizer.py +1 -1
  265. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
  266. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
  267. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  268. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
  269. diffusers/pipelines/latte/pipeline_latte.py +12 -12
  270. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
  271. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
  272. diffusers/pipelines/ltx/__init__.py +4 -0
  273. diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
  274. diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
  275. diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
  276. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
  277. diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
  278. diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
  279. diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
  280. diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
  281. diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
  282. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
  283. diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
  284. diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
  285. diffusers/pipelines/onnx_utils.py +15 -2
  286. diffusers/pipelines/pag/pag_utils.py +2 -2
  287. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
  288. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
  289. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
  290. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
  291. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
  292. diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
  293. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
  294. diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
  295. diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
  296. diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
  297. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
  298. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
  299. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
  300. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
  301. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
  302. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
  303. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
  304. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  305. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
  306. diffusers/pipelines/pia/pipeline_pia.py +8 -6
  307. diffusers/pipelines/pipeline_flax_utils.py +3 -4
  308. diffusers/pipelines/pipeline_loading_utils.py +89 -13
  309. diffusers/pipelines/pipeline_utils.py +105 -33
  310. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
  311. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
  312. diffusers/pipelines/sana/__init__.py +4 -0
  313. diffusers/pipelines/sana/pipeline_sana.py +23 -21
  314. diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
  315. diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
  316. diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
  317. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
  318. diffusers/pipelines/shap_e/camera.py +1 -1
  319. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  320. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  321. diffusers/pipelines/shap_e/renderer.py +3 -3
  322. diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
  323. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
  324. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
  325. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
  326. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
  327. diffusers/pipelines/stable_diffusion/__init__.py +0 -7
  328. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  329. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
  330. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  331. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
  332. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  333. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
  334. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
  335. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
  336. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
  337. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
  338. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
  339. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
  340. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
  341. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
  342. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
  343. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
  344. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
  345. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
  346. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
  347. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  348. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  349. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  350. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
  351. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
  352. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
  353. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
  354. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
  355. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
  356. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
  357. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
  358. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
  359. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
  360. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
  361. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
  362. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  363. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
  364. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  365. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
  366. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
  367. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
  368. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
  369. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
  370. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
  371. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
  372. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
  373. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
  374. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
  375. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
  376. diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
  377. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
  378. diffusers/pipelines/unclip/text_proj.py +2 -2
  379. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
  380. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  381. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
  382. diffusers/pipelines/visualcloze/__init__.py +52 -0
  383. diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
  384. diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
  385. diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
  386. diffusers/pipelines/wan/__init__.py +2 -0
  387. diffusers/pipelines/wan/pipeline_wan.py +17 -12
  388. diffusers/pipelines/wan/pipeline_wan_i2v.py +42 -20
  389. diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
  390. diffusers/pipelines/wan/pipeline_wan_video2video.py +18 -18
  391. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  392. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
  393. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  394. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  395. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
  396. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
  397. diffusers/quantizers/__init__.py +179 -1
  398. diffusers/quantizers/base.py +6 -1
  399. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
  400. diffusers/quantizers/bitsandbytes/utils.py +10 -7
  401. diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
  402. diffusers/quantizers/gguf/utils.py +16 -13
  403. diffusers/quantizers/quantization_config.py +18 -16
  404. diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
  405. diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
  406. diffusers/schedulers/__init__.py +3 -1
  407. diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
  408. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  409. diffusers/schedulers/scheduling_consistency_models.py +1 -1
  410. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
  411. diffusers/schedulers/scheduling_ddim.py +8 -8
  412. diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
  413. diffusers/schedulers/scheduling_ddim_flax.py +6 -6
  414. diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
  415. diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
  416. diffusers/schedulers/scheduling_ddpm.py +9 -9
  417. diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
  418. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
  419. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
  420. diffusers/schedulers/scheduling_deis_multistep.py +8 -8
  421. diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
  422. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
  423. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
  424. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
  425. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  426. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
  427. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
  428. diffusers/schedulers/scheduling_edm_euler.py +20 -11
  429. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
  430. diffusers/schedulers/scheduling_euler_discrete.py +3 -3
  431. diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
  432. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
  433. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
  434. diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
  435. diffusers/schedulers/scheduling_heun_discrete.py +2 -2
  436. diffusers/schedulers/scheduling_ipndm.py +2 -2
  437. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
  438. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
  439. diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
  440. diffusers/schedulers/scheduling_lcm.py +3 -3
  441. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  442. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  443. diffusers/schedulers/scheduling_pndm.py +4 -4
  444. diffusers/schedulers/scheduling_pndm_flax.py +4 -4
  445. diffusers/schedulers/scheduling_repaint.py +9 -9
  446. diffusers/schedulers/scheduling_sasolver.py +15 -15
  447. diffusers/schedulers/scheduling_scm.py +1 -1
  448. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  449. diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
  450. diffusers/schedulers/scheduling_tcd.py +3 -3
  451. diffusers/schedulers/scheduling_unclip.py +5 -5
  452. diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
  453. diffusers/schedulers/scheduling_utils.py +1 -1
  454. diffusers/schedulers/scheduling_utils_flax.py +1 -1
  455. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  456. diffusers/training_utils.py +13 -5
  457. diffusers/utils/__init__.py +5 -0
  458. diffusers/utils/accelerate_utils.py +1 -1
  459. diffusers/utils/doc_utils.py +1 -1
  460. diffusers/utils/dummy_pt_objects.py +120 -0
  461. diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
  462. diffusers/utils/dynamic_modules_utils.py +21 -3
  463. diffusers/utils/export_utils.py +1 -1
  464. diffusers/utils/import_utils.py +81 -18
  465. diffusers/utils/logging.py +1 -1
  466. diffusers/utils/outputs.py +2 -1
  467. diffusers/utils/peft_utils.py +91 -8
  468. diffusers/utils/state_dict_utils.py +20 -3
  469. diffusers/utils/testing_utils.py +59 -7
  470. diffusers/utils/torch_utils.py +25 -5
  471. diffusers/video_processor.py +2 -2
  472. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/METADATA +3 -3
  473. diffusers-0.34.0.dist-info/RECORD +639 -0
  474. diffusers-0.33.0.dist-info/RECORD +0 -608
  475. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
  476. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/WHEEL +0 -0
  477. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
  478. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Susung Hong and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Susung Hong and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -34,7 +34,7 @@ from ...utils import (
34
34
  unscale_lora_layers,
35
35
  )
36
36
  from ...utils.torch_utils import randn_tensor
37
- from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
37
+ from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
38
38
  from ..stable_diffusion import StableDiffusionPipelineOutput
39
39
  from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
40
40
 
@@ -106,8 +106,12 @@ class CrossAttnStoreProcessor:
106
106
  return hidden_states
107
107
 
108
108
 
109
- # Modified to get self-attention guidance scale in this paper (https://arxiv.org/pdf/2210.00939.pdf) as an input
110
- class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin):
109
+ # Modified to get self-attention guidance scale in this paper (https://huggingface.co/papers/2210.00939) as an input
110
+ class StableDiffusionSAGPipeline(
111
+ DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin
112
+ ):
113
+ _last_supported_version = "0.33.1"
114
+
111
115
  r"""
112
116
  Pipeline for text-to-image generation using Stable Diffusion.
113
117
 
@@ -476,7 +480,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
476
480
  def prepare_extra_step_kwargs(self, generator, eta):
477
481
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
478
482
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
479
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
483
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
480
484
  # and should be between [0, 1]
481
485
 
482
486
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -616,8 +620,8 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
616
620
  num_images_per_prompt (`int`, *optional*, defaults to 1):
617
621
  The number of images to generate per prompt.
618
622
  eta (`float`, *optional*, defaults to 0.0):
619
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
620
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
623
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
624
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
621
625
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
622
626
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
623
627
  generation deterministic.
@@ -681,11 +685,11 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
681
685
 
682
686
  device = self._execution_device
683
687
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
684
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
688
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
685
689
  # corresponds to doing no classifier free guidance.
686
690
  do_classifier_free_guidance = guidance_scale > 1.0
687
691
  # and `sag_scale` is` `s` of equation (16)
688
- # of the self-attention guidance paper: https://arxiv.org/pdf/2210.00939.pdf
692
+ # of the self-attention guidance paper: https://huggingface.co/papers/2210.00939
689
693
  # `sag_scale = 0` means no self-attention guidance
690
694
  do_self_attention_guidance = sag_scale > 0.0
691
695
 
@@ -802,7 +806,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
802
806
  if do_self_attention_guidance:
803
807
  # classifier-free guidance produces two chunks of attention map
804
808
  # and we only use unconditional one according to equation (25)
805
- # in https://arxiv.org/pdf/2210.00939.pdf
809
+ # in https://huggingface.co/papers/2210.00939
806
810
  if do_classifier_free_guidance:
807
811
  # DDIM-like prediction of x0
808
812
  pred_x0 = self.pred_x0(latents, noise_pred_uncond, t)
@@ -876,7 +880,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
876
880
  return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
877
881
 
878
882
  def sag_masking(self, original_latents, attn_map, map_size, t, eps):
879
- # Same masking process as in SAG paper: https://arxiv.org/pdf/2210.00939.pdf
883
+ # Same masking process as in SAG paper: https://huggingface.co/papers/2210.00939
880
884
  bh, hw1, hw2 = attn_map.shape
881
885
  b, latent_channel, latent_h, latent_w = original_latents.shape
882
886
  h = self.unet.config.attention_head_dim
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -90,7 +90,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
90
90
  r"""
91
91
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
92
92
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
93
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
93
+ Flawed](https://huggingface.co/papers/2305.08891).
94
94
 
95
95
  Args:
96
96
  noise_cfg (`torch.Tensor`):
@@ -598,7 +598,7 @@ class StableDiffusionXLPipeline(
598
598
  def prepare_extra_step_kwargs(self, generator, eta):
599
599
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
600
600
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
601
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
601
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
602
602
  # and should be between [0, 1]
603
603
 
604
604
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -811,7 +811,7 @@ class StableDiffusionXLPipeline(
811
811
  return self._clip_skip
812
812
 
813
813
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
814
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
814
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
815
815
  # corresponds to doing no classifier free guidance.
816
816
  @property
817
817
  def do_classifier_free_guidance(self):
@@ -914,11 +914,11 @@ class StableDiffusionXLPipeline(
914
914
  "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
915
915
  Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
916
916
  guidance_scale (`float`, *optional*, defaults to 5.0):
917
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
918
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
919
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
920
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
921
- usually at the expense of lower image quality.
917
+ Guidance scale as defined in [Classifier-Free Diffusion
918
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
919
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
920
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
921
+ the text `prompt`, usually at the expense of lower image quality.
922
922
  negative_prompt (`str` or `List[str]`, *optional*):
923
923
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
924
924
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -929,8 +929,8 @@ class StableDiffusionXLPipeline(
929
929
  num_images_per_prompt (`int`, *optional*, defaults to 1):
930
930
  The number of images to generate per prompt.
931
931
  eta (`float`, *optional*, defaults to 0.0):
932
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
933
- [`schedulers.DDIMScheduler`], will be ignored for others.
932
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
933
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
934
934
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
935
935
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
936
936
  to make generation deterministic.
@@ -970,9 +970,10 @@ class StableDiffusionXLPipeline(
970
970
  [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
971
971
  guidance_rescale (`float`, *optional*, defaults to 0.0):
972
972
  Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
973
- Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
974
- [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
975
- Guidance rescale factor should fix overexposure when using zero terminal SNR.
973
+ Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
974
+ [Common Diffusion Noise Schedules and Sample Steps are
975
+ Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
976
+ using zero terminal SNR.
976
977
  original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
977
978
  If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
978
979
  `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
@@ -1229,7 +1230,7 @@ class StableDiffusionXLPipeline(
1229
1230
  noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
1230
1231
 
1231
1232
  if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
1232
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
1233
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
1233
1234
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
1234
1235
 
1235
1236
  # compute the previous noisy sample x_t -> x_t-1
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -50,7 +50,7 @@ from ...utils import (
50
50
  scale_lora_layers,
51
51
  unscale_lora_layers,
52
52
  )
53
- from ...utils.torch_utils import randn_tensor
53
+ from ...utils.torch_utils import empty_device_cache, randn_tensor
54
54
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
55
55
  from .pipeline_output import StableDiffusionXLPipelineOutput
56
56
 
@@ -93,7 +93,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
93
93
  r"""
94
94
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
95
95
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
96
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
96
+ Flawed](https://huggingface.co/papers/2305.08891).
97
97
 
98
98
  Args:
99
99
  noise_cfg (`torch.Tensor`):
@@ -544,7 +544,7 @@ class StableDiffusionXLImg2ImgPipeline(
544
544
  def prepare_extra_step_kwargs(self, generator, eta):
545
545
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
546
546
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
547
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
547
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
548
548
  # and should be between [0, 1]
549
549
 
550
550
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -704,7 +704,7 @@ class StableDiffusionXLImg2ImgPipeline(
704
704
  # Offload text encoder if `enable_model_cpu_offload` was enabled
705
705
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
706
706
  self.text_encoder_2.to("cpu")
707
- torch.cuda.empty_cache()
707
+ empty_device_cache()
708
708
 
709
709
  image = image.to(device=device, dtype=dtype)
710
710
 
@@ -957,7 +957,7 @@ class StableDiffusionXLImg2ImgPipeline(
957
957
  return self._clip_skip
958
958
 
959
959
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
960
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
960
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
961
961
  # corresponds to doing no classifier free guidance.
962
962
  @property
963
963
  def do_classifier_free_guidance(self):
@@ -1074,11 +1074,11 @@ class StableDiffusionXLImg2ImgPipeline(
1074
1074
  forms a part of a "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refine Image
1075
1075
  Quality**](https://huggingface.co/docs/diffusers/using-diffusers/sdxl#refine-image-quality).
1076
1076
  guidance_scale (`float`, *optional*, defaults to 7.5):
1077
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
1078
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
1079
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
1080
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
1081
- usually at the expense of lower image quality.
1077
+ Guidance scale as defined in [Classifier-Free Diffusion
1078
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
1079
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
1080
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
1081
+ the text `prompt`, usually at the expense of lower image quality.
1082
1082
  negative_prompt (`str` or `List[str]`, *optional*):
1083
1083
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
1084
1084
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -1089,8 +1089,8 @@ class StableDiffusionXLImg2ImgPipeline(
1089
1089
  num_images_per_prompt (`int`, *optional*, defaults to 1):
1090
1090
  The number of images to generate per prompt.
1091
1091
  eta (`float`, *optional*, defaults to 0.0):
1092
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
1093
- [`schedulers.DDIMScheduler`], will be ignored for others.
1092
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
1093
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
1094
1094
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
1095
1095
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
1096
1096
  to make generation deterministic.
@@ -1130,9 +1130,10 @@ class StableDiffusionXLImg2ImgPipeline(
1130
1130
  [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
1131
1131
  guidance_rescale (`float`, *optional*, defaults to 0.0):
1132
1132
  Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
1133
- Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
1134
- [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
1135
- Guidance rescale factor should fix overexposure when using zero terminal SNR.
1133
+ Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
1134
+ [Common Diffusion Noise Schedules and Sample Steps are
1135
+ Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
1136
+ using zero terminal SNR.
1136
1137
  original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
1137
1138
  If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
1138
1139
  `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
@@ -1420,7 +1421,7 @@ class StableDiffusionXLImg2ImgPipeline(
1420
1421
  noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
1421
1422
 
1422
1423
  if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
1423
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
1424
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
1424
1425
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
1425
1426
 
1426
1427
  # compute the previous noisy sample x_t -> x_t-1
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -104,7 +104,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
104
104
  r"""
105
105
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
106
106
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
107
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
107
+ Flawed](https://huggingface.co/papers/2305.08891).
108
108
 
109
109
  Args:
110
110
  noise_cfg (`torch.Tensor`):
@@ -648,7 +648,7 @@ class StableDiffusionXLInpaintPipeline(
648
648
  def prepare_extra_step_kwargs(self, generator, eta):
649
649
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
650
650
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
651
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
651
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
652
652
  # and should be between [0, 1]
653
653
 
654
654
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -1062,7 +1062,7 @@ class StableDiffusionXLInpaintPipeline(
1062
1062
  return self._clip_skip
1063
1063
 
1064
1064
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
1065
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
1065
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
1066
1066
  # corresponds to doing no classifier free guidance.
1067
1067
  @property
1068
1068
  def do_classifier_free_guidance(self):
@@ -1208,11 +1208,11 @@ class StableDiffusionXLInpaintPipeline(
1208
1208
  forms a part of a "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
1209
1209
  Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output).
1210
1210
  guidance_scale (`float`, *optional*, defaults to 7.5):
1211
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
1212
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
1213
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
1214
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
1215
- usually at the expense of lower image quality.
1211
+ Guidance scale as defined in [Classifier-Free Diffusion
1212
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
1213
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
1214
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
1215
+ the text `prompt`, usually at the expense of lower image quality.
1216
1216
  negative_prompt (`str` or `List[str]`, *optional*):
1217
1217
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
1218
1218
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -1243,8 +1243,8 @@ class StableDiffusionXLInpaintPipeline(
1243
1243
  num_images_per_prompt (`int`, *optional*, defaults to 1):
1244
1244
  The number of images to generate per prompt.
1245
1245
  eta (`float`, *optional*, defaults to 0.0):
1246
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
1247
- [`schedulers.DDIMScheduler`], will be ignored for others.
1246
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
1247
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
1248
1248
  generator (`torch.Generator`, *optional*):
1249
1249
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
1250
1250
  to make generation deterministic.
@@ -1638,7 +1638,7 @@ class StableDiffusionXLInpaintPipeline(
1638
1638
  noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
1639
1639
 
1640
1640
  if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
1641
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
1641
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
1642
1642
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
1643
1643
 
1644
1644
  # compute the previous noisy sample x_t -> x_t-1
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Harutatsu Akiyama and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Harutatsu Akiyama and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -104,7 +104,7 @@ def retrieve_latents(
104
104
  def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
105
105
  """
106
106
  Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
107
- Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
107
+ Sample Steps are Flawed](https://huggingface.co/papers/2305.08891). See Section 3.4
108
108
  """
109
109
  std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
110
110
  std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
@@ -427,7 +427,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
427
427
  def prepare_extra_step_kwargs(self, generator, eta):
428
428
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
429
429
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
430
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
430
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
431
431
  # and should be between [0, 1]
432
432
 
433
433
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -667,11 +667,11 @@ class StableDiffusionXLInstructPix2PixPipeline(
667
667
  "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
668
668
  Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
669
669
  guidance_scale (`float`, *optional*, defaults to 5.0):
670
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
671
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
672
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
673
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
674
- usually at the expense of lower image quality.
670
+ Guidance scale as defined in [Classifier-Free Diffusion
671
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
672
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
673
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
674
+ the text `prompt`, usually at the expense of lower image quality.
675
675
  image_guidance_scale (`float`, *optional*, defaults to 1.5):
676
676
  Image guidance scale is to push the generated image towards the initial image `image`. Image guidance
677
677
  scale is enabled by setting `image_guidance_scale > 1`. Higher image guidance scale encourages to
@@ -687,8 +687,8 @@ class StableDiffusionXLInstructPix2PixPipeline(
687
687
  num_images_per_prompt (`int`, *optional*, defaults to 1):
688
688
  The number of images to generate per prompt.
689
689
  eta (`float`, *optional*, defaults to 0.0):
690
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
691
- [`schedulers.DDIMScheduler`], will be ignored for others.
690
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
691
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
692
692
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
693
693
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
694
694
  to make generation deterministic.
@@ -728,9 +728,10 @@ class StableDiffusionXLInstructPix2PixPipeline(
728
728
  [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
729
729
  guidance_rescale (`float`, *optional*, defaults to 0.0):
730
730
  Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
731
- Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
732
- [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
733
- Guidance rescale factor should fix overexposure when using zero terminal SNR.
731
+ Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
732
+ [Common Diffusion Noise Schedules and Sample Steps are
733
+ Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
734
+ using zero terminal SNR.
734
735
  original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
735
736
  If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
736
737
  `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
@@ -785,7 +786,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
785
786
  device = self._execution_device
786
787
 
787
788
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
788
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
789
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
789
790
  # corresponds to doing no classifier free guidance.
790
791
  do_classifier_free_guidance = guidance_scale > 1.0 and image_guidance_scale >= 1.0
791
792
 
@@ -928,7 +929,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
928
929
  )
929
930
 
930
931
  if do_classifier_free_guidance and guidance_rescale > 0.0:
931
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
932
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
932
933
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
933
934
 
934
935
  # compute the previous noisy sample x_t -> x_t-1
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -369,7 +369,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
369
369
  return self._guidance_scale
370
370
 
371
371
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
372
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
372
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
373
373
  # corresponds to doing no classifier free guidance.
374
374
  @property
375
375
  def do_classifier_free_guidance(self):
@@ -495,7 +495,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
495
495
  batch_size = image.shape[0]
496
496
  device = self._execution_device
497
497
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
498
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
498
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
499
499
  # corresponds to doing no classifier free guidance.
500
500
  self._guidance_scale = max_guidance_scale
501
501
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 TencentARC and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 TencentARC and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -123,7 +123,7 @@ def _preprocess_adapter_image(image, height, width):
123
123
  image = torch.cat(image, dim=0)
124
124
  else:
125
125
  raise ValueError(
126
- f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but recive: {image[0].ndim}"
126
+ f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but receive: {image[0].ndim}"
127
127
  )
128
128
  return image
129
129
 
@@ -191,7 +191,7 @@ def retrieve_timesteps(
191
191
  class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin, FromSingleFileMixin):
192
192
  r"""
193
193
  Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter
194
- https://arxiv.org/abs/2302.08453
194
+ https://huggingface.co/papers/2302.08453
195
195
 
196
196
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
197
197
  library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
@@ -521,7 +521,7 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin, Fr
521
521
  def prepare_extra_step_kwargs(self, generator, eta):
522
522
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
523
523
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
524
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
524
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
525
525
  # and should be between [0, 1]
526
526
 
527
527
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -680,7 +680,7 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin, Fr
680
680
  return self._guidance_scale
681
681
 
682
682
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
683
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
683
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
684
684
  # corresponds to doing no classifier free guidance.
685
685
  @property
686
686
  def do_classifier_free_guidance(self):
@@ -740,11 +740,11 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin, Fr
740
740
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
741
741
  will be used.
742
742
  guidance_scale (`float`, *optional*, defaults to 7.5):
743
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
744
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
745
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
746
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
747
- usually at the expense of lower image quality.
743
+ Guidance scale as defined in [Classifier-Free Diffusion
744
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
745
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
746
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
747
+ the text `prompt`, usually at the expense of lower image quality.
748
748
  negative_prompt (`str` or `List[str]`, *optional*):
749
749
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
750
750
  `negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
@@ -752,8 +752,8 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin, Fr
752
752
  num_images_per_prompt (`int`, *optional*, defaults to 1):
753
753
  The number of images to generate per prompt.
754
754
  eta (`float`, *optional*, defaults to 0.0):
755
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
756
- [`schedulers.DDIMScheduler`], will be ignored for others.
755
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
756
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
757
757
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
758
758
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
759
759
  to make generation deterministic.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 TencentARC and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 TencentARC and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -121,7 +121,7 @@ def _preprocess_adapter_image(image, height, width):
121
121
  image = torch.cat(image, dim=0)
122
122
  else:
123
123
  raise ValueError(
124
- f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but recive: {image[0].ndim}"
124
+ f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but receive: {image[0].ndim}"
125
125
  )
126
126
  return image
127
127
 
@@ -131,7 +131,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
131
131
  r"""
132
132
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
133
133
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
134
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
134
+ Flawed](https://huggingface.co/papers/2305.08891).
135
135
 
136
136
  Args:
137
137
  noise_cfg (`torch.Tensor`):
@@ -223,7 +223,7 @@ class StableDiffusionXLAdapterPipeline(
223
223
  ):
224
224
  r"""
225
225
  Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter
226
- https://arxiv.org/abs/2302.08453
226
+ https://huggingface.co/papers/2302.08453
227
227
 
228
228
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
229
229
  library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
@@ -624,7 +624,7 @@ class StableDiffusionXLAdapterPipeline(
624
624
  def prepare_extra_step_kwargs(self, generator, eta):
625
625
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
626
626
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
627
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
627
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
628
628
  # and should be between [0, 1]
629
629
 
630
630
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -859,7 +859,7 @@ class StableDiffusionXLAdapterPipeline(
859
859
  return self._guidance_scale
860
860
 
861
861
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
862
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
862
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
863
863
  # corresponds to doing no classifier free guidance.
864
864
  @property
865
865
  def do_classifier_free_guidance(self):
@@ -948,11 +948,11 @@ class StableDiffusionXLAdapterPipeline(
948
948
  "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
949
949
  Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
950
950
  guidance_scale (`float`, *optional*, defaults to 5.0):
951
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
952
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
953
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
954
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
955
- usually at the expense of lower image quality.
951
+ Guidance scale as defined in [Classifier-Free Diffusion
952
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
953
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
954
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
955
+ the text `prompt`, usually at the expense of lower image quality.
956
956
  negative_prompt (`str` or `List[str]`, *optional*):
957
957
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
958
958
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -963,8 +963,8 @@ class StableDiffusionXLAdapterPipeline(
963
963
  num_images_per_prompt (`int`, *optional*, defaults to 1):
964
964
  The number of images to generate per prompt.
965
965
  eta (`float`, *optional*, defaults to 0.0):
966
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
967
- [`schedulers.DDIMScheduler`], will be ignored for others.
966
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
967
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
968
968
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
969
969
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
970
970
  to make generation deterministic.
@@ -1010,9 +1010,10 @@ class StableDiffusionXLAdapterPipeline(
1010
1010
  [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
1011
1011
  guidance_rescale (`float`, *optional*, defaults to 0.0):
1012
1012
  Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
1013
- Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
1014
- [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
1015
- Guidance rescale factor should fix overexposure when using zero terminal SNR.
1013
+ Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
1014
+ [Common Diffusion Noise Schedules and Sample Steps are
1015
+ Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
1016
+ using zero terminal SNR.
1016
1017
  original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
1017
1018
  If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
1018
1019
  `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
@@ -1266,7 +1267,7 @@ class StableDiffusionXLAdapterPipeline(
1266
1267
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
1267
1268
 
1268
1269
  if self.do_classifier_free_guidance and guidance_rescale > 0.0:
1269
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
1270
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
1270
1271
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
1271
1272
 
1272
1273
  # compute the previous noisy sample x_t -> x_t-1