diffusers 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (478) hide show
  1. diffusers/__init__.py +48 -1
  2. diffusers/commands/__init__.py +1 -1
  3. diffusers/commands/diffusers_cli.py +1 -1
  4. diffusers/commands/env.py +1 -1
  5. diffusers/commands/fp16_safetensors.py +1 -1
  6. diffusers/dependency_versions_check.py +1 -1
  7. diffusers/dependency_versions_table.py +1 -1
  8. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  9. diffusers/hooks/faster_cache.py +2 -2
  10. diffusers/hooks/group_offloading.py +128 -29
  11. diffusers/hooks/hooks.py +2 -2
  12. diffusers/hooks/layerwise_casting.py +3 -3
  13. diffusers/hooks/pyramid_attention_broadcast.py +1 -1
  14. diffusers/image_processor.py +7 -2
  15. diffusers/loaders/__init__.py +4 -0
  16. diffusers/loaders/ip_adapter.py +5 -14
  17. diffusers/loaders/lora_base.py +212 -111
  18. diffusers/loaders/lora_conversion_utils.py +275 -34
  19. diffusers/loaders/lora_pipeline.py +1554 -819
  20. diffusers/loaders/peft.py +52 -109
  21. diffusers/loaders/single_file.py +2 -2
  22. diffusers/loaders/single_file_model.py +20 -4
  23. diffusers/loaders/single_file_utils.py +225 -5
  24. diffusers/loaders/textual_inversion.py +3 -2
  25. diffusers/loaders/transformer_flux.py +1 -1
  26. diffusers/loaders/transformer_sd3.py +2 -2
  27. diffusers/loaders/unet.py +2 -16
  28. diffusers/loaders/unet_loader_utils.py +1 -1
  29. diffusers/loaders/utils.py +1 -1
  30. diffusers/models/__init__.py +15 -1
  31. diffusers/models/activations.py +5 -5
  32. diffusers/models/adapter.py +2 -3
  33. diffusers/models/attention.py +4 -4
  34. diffusers/models/attention_flax.py +10 -10
  35. diffusers/models/attention_processor.py +14 -10
  36. diffusers/models/auto_model.py +47 -10
  37. diffusers/models/autoencoders/__init__.py +1 -0
  38. diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
  39. diffusers/models/autoencoders/autoencoder_dc.py +3 -3
  40. diffusers/models/autoencoders/autoencoder_kl.py +4 -4
  41. diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
  42. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
  43. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
  44. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
  45. diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
  46. diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
  47. diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
  48. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
  49. diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
  50. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
  51. diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
  52. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  53. diffusers/models/autoencoders/vae.py +13 -2
  54. diffusers/models/autoencoders/vq_model.py +2 -2
  55. diffusers/models/cache_utils.py +1 -1
  56. diffusers/models/controlnet.py +1 -1
  57. diffusers/models/controlnet_flux.py +1 -1
  58. diffusers/models/controlnet_sd3.py +1 -1
  59. diffusers/models/controlnet_sparsectrl.py +1 -1
  60. diffusers/models/controlnets/__init__.py +1 -0
  61. diffusers/models/controlnets/controlnet.py +3 -3
  62. diffusers/models/controlnets/controlnet_flax.py +1 -1
  63. diffusers/models/controlnets/controlnet_flux.py +16 -15
  64. diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
  65. diffusers/models/controlnets/controlnet_sana.py +290 -0
  66. diffusers/models/controlnets/controlnet_sd3.py +1 -1
  67. diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
  68. diffusers/models/controlnets/controlnet_union.py +1 -1
  69. diffusers/models/controlnets/controlnet_xs.py +7 -7
  70. diffusers/models/controlnets/multicontrolnet.py +4 -5
  71. diffusers/models/controlnets/multicontrolnet_union.py +5 -6
  72. diffusers/models/downsampling.py +2 -2
  73. diffusers/models/embeddings.py +10 -12
  74. diffusers/models/embeddings_flax.py +2 -2
  75. diffusers/models/lora.py +3 -3
  76. diffusers/models/modeling_utils.py +44 -14
  77. diffusers/models/normalization.py +4 -4
  78. diffusers/models/resnet.py +2 -2
  79. diffusers/models/resnet_flax.py +1 -1
  80. diffusers/models/transformers/__init__.py +5 -0
  81. diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
  82. diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
  83. diffusers/models/transformers/consisid_transformer_3d.py +1 -1
  84. diffusers/models/transformers/dit_transformer_2d.py +2 -2
  85. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  86. diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
  87. diffusers/models/transformers/latte_transformer_3d.py +4 -5
  88. diffusers/models/transformers/lumina_nextdit2d.py +2 -2
  89. diffusers/models/transformers/pixart_transformer_2d.py +3 -3
  90. diffusers/models/transformers/prior_transformer.py +1 -1
  91. diffusers/models/transformers/sana_transformer.py +8 -3
  92. diffusers/models/transformers/stable_audio_transformer.py +5 -9
  93. diffusers/models/transformers/t5_film_transformer.py +3 -3
  94. diffusers/models/transformers/transformer_2d.py +1 -1
  95. diffusers/models/transformers/transformer_allegro.py +1 -1
  96. diffusers/models/transformers/transformer_chroma.py +742 -0
  97. diffusers/models/transformers/transformer_cogview3plus.py +5 -10
  98. diffusers/models/transformers/transformer_cogview4.py +317 -25
  99. diffusers/models/transformers/transformer_cosmos.py +579 -0
  100. diffusers/models/transformers/transformer_flux.py +9 -11
  101. diffusers/models/transformers/transformer_hidream_image.py +942 -0
  102. diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
  103. diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
  104. diffusers/models/transformers/transformer_ltx.py +2 -2
  105. diffusers/models/transformers/transformer_lumina2.py +1 -1
  106. diffusers/models/transformers/transformer_mochi.py +1 -1
  107. diffusers/models/transformers/transformer_omnigen.py +2 -2
  108. diffusers/models/transformers/transformer_sd3.py +7 -7
  109. diffusers/models/transformers/transformer_temporal.py +1 -1
  110. diffusers/models/transformers/transformer_wan.py +24 -8
  111. diffusers/models/transformers/transformer_wan_vace.py +393 -0
  112. diffusers/models/unets/unet_1d.py +1 -1
  113. diffusers/models/unets/unet_1d_blocks.py +1 -1
  114. diffusers/models/unets/unet_2d.py +1 -1
  115. diffusers/models/unets/unet_2d_blocks.py +1 -1
  116. diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
  117. diffusers/models/unets/unet_2d_condition.py +2 -2
  118. diffusers/models/unets/unet_2d_condition_flax.py +2 -2
  119. diffusers/models/unets/unet_3d_blocks.py +1 -1
  120. diffusers/models/unets/unet_3d_condition.py +3 -3
  121. diffusers/models/unets/unet_i2vgen_xl.py +3 -3
  122. diffusers/models/unets/unet_kandinsky3.py +1 -1
  123. diffusers/models/unets/unet_motion_model.py +2 -2
  124. diffusers/models/unets/unet_stable_cascade.py +1 -1
  125. diffusers/models/upsampling.py +2 -2
  126. diffusers/models/vae_flax.py +2 -2
  127. diffusers/models/vq_model.py +1 -1
  128. diffusers/pipelines/__init__.py +37 -6
  129. diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
  130. diffusers/pipelines/amused/pipeline_amused.py +7 -6
  131. diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
  132. diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
  133. diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
  134. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
  135. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
  136. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
  137. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
  138. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
  139. diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
  140. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  141. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
  142. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
  143. diffusers/pipelines/auto_pipeline.py +6 -7
  144. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  145. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  146. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
  147. diffusers/pipelines/chroma/__init__.py +49 -0
  148. diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
  149. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
  150. diffusers/pipelines/chroma/pipeline_output.py +21 -0
  151. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
  152. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
  153. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
  154. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
  155. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
  156. diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
  157. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
  158. diffusers/pipelines/consisid/consisid_utils.py +2 -2
  159. diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
  160. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  161. diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
  162. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
  163. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
  164. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
  165. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
  166. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
  167. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
  168. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
  169. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
  170. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
  171. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  172. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
  173. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
  174. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  175. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
  176. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
  177. diffusers/pipelines/cosmos/__init__.py +54 -0
  178. diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
  179. diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
  180. diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
  181. diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
  182. diffusers/pipelines/cosmos/pipeline_output.py +40 -0
  183. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
  184. diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
  185. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  186. diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
  187. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
  188. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
  189. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
  190. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
  191. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
  192. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
  193. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
  194. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  195. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
  196. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  197. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
  198. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
  199. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  200. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  201. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  202. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  203. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  204. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
  205. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
  206. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
  207. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
  208. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
  209. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
  210. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  211. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
  212. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
  213. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
  214. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
  215. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
  216. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  217. diffusers/pipelines/dit/pipeline_dit.py +1 -1
  218. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
  219. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
  220. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
  221. diffusers/pipelines/flux/modeling_flux.py +1 -1
  222. diffusers/pipelines/flux/pipeline_flux.py +10 -17
  223. diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
  224. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
  225. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
  226. diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
  227. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
  228. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
  229. diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
  230. diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
  231. diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
  232. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
  233. diffusers/pipelines/free_init_utils.py +2 -2
  234. diffusers/pipelines/free_noise_utils.py +3 -3
  235. diffusers/pipelines/hidream_image/__init__.py +47 -0
  236. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
  237. diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
  238. diffusers/pipelines/hunyuan_video/__init__.py +2 -0
  239. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
  240. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
  241. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
  242. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
  243. diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
  244. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
  245. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
  246. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
  247. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
  248. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
  249. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
  250. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
  251. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  252. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
  253. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
  254. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
  255. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
  256. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
  257. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
  258. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
  259. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
  260. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
  261. diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
  262. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
  263. diffusers/pipelines/kolors/text_encoder.py +3 -3
  264. diffusers/pipelines/kolors/tokenizer.py +1 -1
  265. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
  266. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
  267. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  268. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
  269. diffusers/pipelines/latte/pipeline_latte.py +12 -12
  270. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
  271. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
  272. diffusers/pipelines/ltx/__init__.py +4 -0
  273. diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
  274. diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
  275. diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
  276. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
  277. diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
  278. diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
  279. diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
  280. diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
  281. diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
  282. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
  283. diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
  284. diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
  285. diffusers/pipelines/onnx_utils.py +15 -2
  286. diffusers/pipelines/pag/pag_utils.py +2 -2
  287. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
  288. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
  289. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
  290. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
  291. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
  292. diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
  293. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
  294. diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
  295. diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
  296. diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
  297. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
  298. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
  299. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
  300. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
  301. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
  302. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
  303. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
  304. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  305. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
  306. diffusers/pipelines/pia/pipeline_pia.py +8 -6
  307. diffusers/pipelines/pipeline_flax_utils.py +3 -4
  308. diffusers/pipelines/pipeline_loading_utils.py +89 -13
  309. diffusers/pipelines/pipeline_utils.py +105 -33
  310. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
  311. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
  312. diffusers/pipelines/sana/__init__.py +4 -0
  313. diffusers/pipelines/sana/pipeline_sana.py +23 -21
  314. diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
  315. diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
  316. diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
  317. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
  318. diffusers/pipelines/shap_e/camera.py +1 -1
  319. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  320. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  321. diffusers/pipelines/shap_e/renderer.py +3 -3
  322. diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
  323. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
  324. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
  325. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
  326. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
  327. diffusers/pipelines/stable_diffusion/__init__.py +0 -7
  328. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  329. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
  330. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  331. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
  332. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  333. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
  334. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
  335. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
  336. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
  337. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
  338. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
  339. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
  340. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
  341. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
  342. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
  343. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
  344. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
  345. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
  346. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
  347. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  348. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  349. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  350. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
  351. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
  352. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
  353. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
  354. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
  355. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
  356. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
  357. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
  358. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
  359. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
  360. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
  361. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
  362. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  363. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
  364. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  365. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
  366. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
  367. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
  368. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
  369. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
  370. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
  371. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
  372. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
  373. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
  374. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
  375. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
  376. diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
  377. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
  378. diffusers/pipelines/unclip/text_proj.py +2 -2
  379. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
  380. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  381. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
  382. diffusers/pipelines/visualcloze/__init__.py +52 -0
  383. diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
  384. diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
  385. diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
  386. diffusers/pipelines/wan/__init__.py +2 -0
  387. diffusers/pipelines/wan/pipeline_wan.py +13 -10
  388. diffusers/pipelines/wan/pipeline_wan_i2v.py +38 -18
  389. diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
  390. diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
  391. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  392. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
  393. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  394. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  395. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
  396. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
  397. diffusers/quantizers/__init__.py +179 -1
  398. diffusers/quantizers/base.py +6 -1
  399. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
  400. diffusers/quantizers/bitsandbytes/utils.py +10 -7
  401. diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
  402. diffusers/quantizers/gguf/utils.py +16 -13
  403. diffusers/quantizers/quantization_config.py +18 -16
  404. diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
  405. diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
  406. diffusers/schedulers/__init__.py +3 -1
  407. diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
  408. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  409. diffusers/schedulers/scheduling_consistency_models.py +1 -1
  410. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
  411. diffusers/schedulers/scheduling_ddim.py +8 -8
  412. diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
  413. diffusers/schedulers/scheduling_ddim_flax.py +6 -6
  414. diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
  415. diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
  416. diffusers/schedulers/scheduling_ddpm.py +9 -9
  417. diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
  418. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
  419. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
  420. diffusers/schedulers/scheduling_deis_multistep.py +8 -8
  421. diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
  422. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
  423. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
  424. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
  425. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  426. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
  427. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
  428. diffusers/schedulers/scheduling_edm_euler.py +20 -11
  429. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
  430. diffusers/schedulers/scheduling_euler_discrete.py +3 -3
  431. diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
  432. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
  433. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
  434. diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
  435. diffusers/schedulers/scheduling_heun_discrete.py +2 -2
  436. diffusers/schedulers/scheduling_ipndm.py +2 -2
  437. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
  438. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
  439. diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
  440. diffusers/schedulers/scheduling_lcm.py +3 -3
  441. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  442. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  443. diffusers/schedulers/scheduling_pndm.py +4 -4
  444. diffusers/schedulers/scheduling_pndm_flax.py +4 -4
  445. diffusers/schedulers/scheduling_repaint.py +9 -9
  446. diffusers/schedulers/scheduling_sasolver.py +15 -15
  447. diffusers/schedulers/scheduling_scm.py +1 -1
  448. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  449. diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
  450. diffusers/schedulers/scheduling_tcd.py +3 -3
  451. diffusers/schedulers/scheduling_unclip.py +5 -5
  452. diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
  453. diffusers/schedulers/scheduling_utils.py +1 -1
  454. diffusers/schedulers/scheduling_utils_flax.py +1 -1
  455. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  456. diffusers/training_utils.py +13 -5
  457. diffusers/utils/__init__.py +5 -0
  458. diffusers/utils/accelerate_utils.py +1 -1
  459. diffusers/utils/doc_utils.py +1 -1
  460. diffusers/utils/dummy_pt_objects.py +120 -0
  461. diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
  462. diffusers/utils/dynamic_modules_utils.py +21 -3
  463. diffusers/utils/export_utils.py +1 -1
  464. diffusers/utils/import_utils.py +81 -18
  465. diffusers/utils/logging.py +1 -1
  466. diffusers/utils/outputs.py +2 -1
  467. diffusers/utils/peft_utils.py +91 -8
  468. diffusers/utils/state_dict_utils.py +20 -3
  469. diffusers/utils/testing_utils.py +59 -7
  470. diffusers/utils/torch_utils.py +25 -5
  471. diffusers/video_processor.py +2 -2
  472. {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/METADATA +70 -55
  473. diffusers-0.34.0.dist-info/RECORD +639 -0
  474. {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/WHEEL +1 -1
  475. diffusers-0.33.1.dist-info/RECORD +0 -608
  476. {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
  477. {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
  478. {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2024 OmniGen team and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 OmniGen team and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -23,12 +23,14 @@ from ...image_processor import PipelineImageInput, VaeImageProcessor
23
23
  from ...models.autoencoders import AutoencoderKL
24
24
  from ...models.transformers import OmniGenTransformer2DModel
25
25
  from ...schedulers import FlowMatchEulerDiscreteScheduler
26
- from ...utils import is_torch_xla_available, logging, replace_example_docstring
26
+ from ...utils import is_torch_xla_available, is_torchvision_available, logging, replace_example_docstring
27
27
  from ...utils.torch_utils import randn_tensor
28
28
  from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
29
- from .processor_omnigen import OmniGenMultiModalProcessor
30
29
 
31
30
 
31
+ if is_torchvision_available():
32
+ from .processor_omnigen import OmniGenMultiModalProcessor
33
+
32
34
  if is_torch_xla_available():
33
35
  XLA_AVAILABLE = True
34
36
  else:
@@ -120,7 +122,7 @@ class OmniGenPipeline(
120
122
  r"""
121
123
  The OmniGen pipeline for multimodal-to-image generation.
122
124
 
123
- Reference: https://arxiv.org/pdf/2409.11340
125
+ Reference: https://huggingface.co/papers/2409.11340
124
126
 
125
127
  Args:
126
128
  transformer ([`OmniGenTransformer2DModel`]):
@@ -176,7 +178,7 @@ class OmniGenPipeline(
176
178
  get the continue embedding of input images by VAE
177
179
 
178
180
  Args:
179
- input_pixel_values: normlized pixel of input images
181
+ input_pixel_values: normalized pixel of input images
180
182
  device:
181
183
  Returns: torch.Tensor
182
184
  """
@@ -346,13 +348,13 @@ class OmniGenPipeline(
346
348
  in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
347
349
  passed will be used. Must be in descending order.
348
350
  guidance_scale (`float`, *optional*, defaults to 2.5):
349
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
350
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
351
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
352
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
353
- usually at the expense of lower image quality.
351
+ Guidance scale as defined in [Classifier-Free Diffusion
352
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
353
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
354
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
355
+ the text `prompt`, usually at the expense of lower image quality.
354
356
  img_guidance_scale (`float`, *optional*, defaults to 1.6):
355
- Defined as equation 3 in [Instrucpix2pix](https://arxiv.org/pdf/2211.09800).
357
+ Defined as equation 3 in [Instrucpix2pix](https://huggingface.co/papers/2211.09800).
356
358
  use_input_image_size_as_output (bool, defaults to False):
357
359
  whether to use the input image size as the output image size, which can be used for single-image input,
358
360
  e.g., image editing task
@@ -1,4 +1,4 @@
1
- # Copyright 2024 OmniGen team and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 OmniGen team and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -18,7 +18,12 @@ from typing import Dict, List
18
18
  import numpy as np
19
19
  import torch
20
20
  from PIL import Image
21
- from torchvision import transforms
21
+
22
+ from ...utils import is_torchvision_available
23
+
24
+
25
+ if is_torchvision_available():
26
+ from torchvision import transforms
22
27
 
23
28
 
24
29
  def crop_image(pil_image, max_image_size):
@@ -198,7 +203,7 @@ class OmniGenCollator:
198
203
  def create_mask(self, attention_mask, num_tokens_for_output_images):
199
204
  """
200
205
  OmniGen applies causal attention to each element in the sequence, but applies bidirectional attention within
201
- each image sequence References: [OmniGen](https://arxiv.org/pdf/2409.11340)
206
+ each image sequence References: [OmniGen](https://huggingface.co/papers/2409.11340)
202
207
  """
203
208
  extended_mask = []
204
209
  padding_images = []
@@ -75,6 +75,11 @@ class OnnxRuntimeModel:
75
75
  logger.info("No onnxruntime provider specified, using CPUExecutionProvider")
76
76
  provider = "CPUExecutionProvider"
77
77
 
78
+ if provider_options is None:
79
+ provider_options = []
80
+ elif not isinstance(provider_options, list):
81
+ provider_options = [provider_options]
82
+
78
83
  return ort.InferenceSession(
79
84
  path, providers=[provider], sess_options=sess_options, provider_options=provider_options
80
85
  )
@@ -174,7 +179,10 @@ class OnnxRuntimeModel:
174
179
  # load model from local directory
175
180
  if os.path.isdir(model_id):
176
181
  model = OnnxRuntimeModel.load_model(
177
- Path(model_id, model_file_name).as_posix(), provider=provider, sess_options=sess_options
182
+ Path(model_id, model_file_name).as_posix(),
183
+ provider=provider,
184
+ sess_options=sess_options,
185
+ provider_options=kwargs.pop("provider_options"),
178
186
  )
179
187
  kwargs["model_save_dir"] = Path(model_id)
180
188
  # load model from hub
@@ -190,7 +198,12 @@ class OnnxRuntimeModel:
190
198
  )
191
199
  kwargs["model_save_dir"] = Path(model_cache_path).parent
192
200
  kwargs["latest_model_name"] = Path(model_cache_path).name
193
- model = OnnxRuntimeModel.load_model(model_cache_path, provider=provider, sess_options=sess_options)
201
+ model = OnnxRuntimeModel.load_model(
202
+ model_cache_path,
203
+ provider=provider,
204
+ sess_options=sess_options,
205
+ provider_options=kwargs.pop("provider_options"),
206
+ )
194
207
  return cls(model=model, **kwargs)
195
208
 
196
209
  @classmethod
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -31,7 +31,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
31
31
 
32
32
 
33
33
  class PAGMixin:
34
- r"""Mixin class for [Pertubed Attention Guidance](https://arxiv.org/abs/2403.17377v1)."""
34
+ r"""Mixin class for [Pertubed Attention Guidance](https://huggingface.co/papers/2403.17377v1)."""
35
35
 
36
36
  def _set_pag_attn_processor(self, pag_applied_layers, do_classifier_free_guidance):
37
37
  r"""
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -36,7 +36,7 @@ from ...utils import (
36
36
  scale_lora_layers,
37
37
  unscale_lora_layers,
38
38
  )
39
- from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
39
+ from ...utils.torch_utils import empty_device_cache, is_compiled_module, is_torch_version, randn_tensor
40
40
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
41
41
  from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
42
42
  from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -541,7 +541,7 @@ class StableDiffusionControlNetPAGPipeline(
541
541
  def prepare_extra_step_kwargs(self, generator, eta):
542
542
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
543
543
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
544
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
544
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
545
545
  # and should be between [0, 1]
546
546
 
547
547
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -843,7 +843,7 @@ class StableDiffusionControlNetPAGPipeline(
843
843
  return self._clip_skip
844
844
 
845
845
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
846
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
846
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
847
847
  # corresponds to doing no classifier free guidance.
848
848
  @property
849
849
  def do_classifier_free_guidance(self):
@@ -933,8 +933,8 @@ class StableDiffusionControlNetPAGPipeline(
933
933
  num_images_per_prompt (`int`, *optional*, defaults to 1):
934
934
  The number of images to generate per prompt.
935
935
  eta (`float`, *optional*, defaults to 0.0):
936
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
937
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
936
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
937
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
938
938
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
939
939
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
940
940
  generation deterministic.
@@ -1228,7 +1228,11 @@ class StableDiffusionControlNetPAGPipeline(
1228
1228
  for i, t in enumerate(timesteps):
1229
1229
  # Relevant thread:
1230
1230
  # https://dev-discuss.pytorch.org/t/cudagraphs-in-pytorch-2-0/1428
1231
- if (is_unet_compiled and is_controlnet_compiled) and is_torch_higher_equal_2_1:
1231
+ if (
1232
+ torch.cuda.is_available()
1233
+ and (is_unet_compiled and is_controlnet_compiled)
1234
+ and is_torch_higher_equal_2_1
1235
+ ):
1232
1236
  torch._inductor.cudagraph_mark_step_begin()
1233
1237
  # expand the latents if we are doing classifier free guidance
1234
1238
  latent_model_input = torch.cat([latents] * (prompt_embeds.shape[0] // latents.shape[0]))
@@ -1309,7 +1313,7 @@ class StableDiffusionControlNetPAGPipeline(
1309
1313
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
1310
1314
  self.unet.to("cpu")
1311
1315
  self.controlnet.to("cpu")
1312
- torch.cuda.empty_cache()
1316
+ empty_device_cache()
1313
1317
 
1314
1318
  if not output_type == "latent":
1315
1319
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -37,7 +37,7 @@ from ...utils import (
37
37
  scale_lora_layers,
38
38
  unscale_lora_layers,
39
39
  )
40
- from ...utils.torch_utils import is_compiled_module, randn_tensor
40
+ from ...utils.torch_utils import empty_device_cache, is_compiled_module, randn_tensor
41
41
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
42
42
  from ..stable_diffusion import StableDiffusionPipelineOutput
43
43
  from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -520,7 +520,7 @@ class StableDiffusionControlNetPAGInpaintPipeline(
520
520
  def prepare_extra_step_kwargs(self, generator, eta):
521
521
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
522
522
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
523
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
523
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
524
524
  # and should be between [0, 1]
525
525
 
526
526
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -955,7 +955,7 @@ class StableDiffusionControlNetPAGInpaintPipeline(
955
955
  return self._clip_skip
956
956
 
957
957
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
958
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
958
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
959
959
  # corresponds to doing no classifier free guidance.
960
960
  @property
961
961
  def do_classifier_free_guidance(self):
@@ -1064,8 +1064,8 @@ class StableDiffusionControlNetPAGInpaintPipeline(
1064
1064
  num_images_per_prompt (`int`, *optional*, defaults to 1):
1065
1065
  The number of images to generate per prompt.
1066
1066
  eta (`float`, *optional*, defaults to 0.0):
1067
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
1068
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
1067
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
1068
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
1069
1069
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
1070
1070
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
1071
1071
  generation deterministic.
@@ -1521,7 +1521,7 @@ class StableDiffusionControlNetPAGInpaintPipeline(
1521
1521
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
1522
1522
  self.unet.to("cpu")
1523
1523
  self.controlnet.to("cpu")
1524
- torch.cuda.empty_cache()
1524
+ empty_device_cache()
1525
1525
 
1526
1526
  if not output_type == "latent":
1527
1527
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -619,7 +619,7 @@ class StableDiffusionXLControlNetPAGPipeline(
619
619
  def prepare_extra_step_kwargs(self, generator, eta):
620
620
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
621
621
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
622
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
622
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
623
623
  # and should be between [0, 1]
624
624
 
625
625
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -992,7 +992,7 @@ class StableDiffusionXLControlNetPAGPipeline(
992
992
  return self._clip_skip
993
993
 
994
994
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
995
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
995
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
996
996
  # corresponds to doing no classifier free guidance.
997
997
  @property
998
998
  def do_classifier_free_guidance(self):
@@ -1111,8 +1111,8 @@ class StableDiffusionXLControlNetPAGPipeline(
1111
1111
  num_images_per_prompt (`int`, *optional*, defaults to 1):
1112
1112
  The number of images to generate per prompt.
1113
1113
  eta (`float`, *optional*, defaults to 0.0):
1114
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
1115
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
1114
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
1115
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
1116
1116
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
1117
1117
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
1118
1118
  generation deterministic.
@@ -1498,7 +1498,11 @@ class StableDiffusionXLControlNetPAGPipeline(
1498
1498
  for i, t in enumerate(timesteps):
1499
1499
  # Relevant thread:
1500
1500
  # https://dev-discuss.pytorch.org/t/cudagraphs-in-pytorch-2-0/1428
1501
- if (is_unet_compiled and is_controlnet_compiled) and is_torch_higher_equal_2_1:
1501
+ if (
1502
+ torch.cuda.is_available()
1503
+ and (is_unet_compiled and is_controlnet_compiled)
1504
+ and is_torch_higher_equal_2_1
1505
+ ):
1502
1506
  torch._inductor.cudagraph_mark_step_begin()
1503
1507
  # expand the latents if we are doing classifier free guidance
1504
1508
  latent_model_input = torch.cat([latents] * (prompt_embeds.shape[0] // latents.shape[0]))
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -52,7 +52,7 @@ from ...utils import (
52
52
  scale_lora_layers,
53
53
  unscale_lora_layers,
54
54
  )
55
- from ...utils.torch_utils import is_compiled_module, randn_tensor
55
+ from ...utils.torch_utils import empty_device_cache, is_compiled_module, randn_tensor
56
56
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
57
57
  from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
58
58
  from .pag_utils import PAGMixin
@@ -115,7 +115,7 @@ EXAMPLE_DOC_STRING = """
115
115
  ... with torch.no_grad(), torch.autocast("cuda"):
116
116
  ... depth_map = depth_estimator(image).predicted_depth
117
117
 
118
- ... depth_map = torch.nn.fuctional.interpolate(
118
+ ... depth_map = torch.nn.functional.interpolate(
119
119
  ... depth_map.unsqueeze(1),
120
120
  ... size=(1024, 1024),
121
121
  ... mode="bicubic",
@@ -611,7 +611,7 @@ class StableDiffusionXLControlNetPAGImg2ImgPipeline(
611
611
  def prepare_extra_step_kwargs(self, generator, eta):
612
612
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
613
613
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
614
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
614
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
615
615
  # and should be between [0, 1]
616
616
 
617
617
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -926,7 +926,7 @@ class StableDiffusionXLControlNetPAGImg2ImgPipeline(
926
926
  # Offload text encoder if `enable_model_cpu_offload` was enabled
927
927
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
928
928
  self.text_encoder_2.to("cpu")
929
- torch.cuda.empty_cache()
929
+ empty_device_cache()
930
930
 
931
931
  image = image.to(device=device, dtype=dtype)
932
932
 
@@ -1074,7 +1074,7 @@ class StableDiffusionXLControlNetPAGImg2ImgPipeline(
1074
1074
  return self._clip_skip
1075
1075
 
1076
1076
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
1077
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
1077
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
1078
1078
  # corresponds to doing no classifier free guidance.
1079
1079
  @property
1080
1080
  def do_classifier_free_guidance(self):
@@ -1176,11 +1176,11 @@ class StableDiffusionXLControlNetPAGImg2ImgPipeline(
1176
1176
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
1177
1177
  expense of slower inference.
1178
1178
  guidance_scale (`float`, *optional*, defaults to 7.5):
1179
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
1180
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
1181
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
1182
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
1183
- usually at the expense of lower image quality.
1179
+ Guidance scale as defined in [Classifier-Free Diffusion
1180
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
1181
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
1182
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
1183
+ the text `prompt`, usually at the expense of lower image quality.
1184
1184
  negative_prompt (`str` or `List[str]`, *optional*):
1185
1185
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
1186
1186
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -1191,8 +1191,8 @@ class StableDiffusionXLControlNetPAGImg2ImgPipeline(
1191
1191
  num_images_per_prompt (`int`, *optional*, defaults to 1):
1192
1192
  The number of images to generate per prompt.
1193
1193
  eta (`float`, *optional*, defaults to 0.0):
1194
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
1195
- [`schedulers.DDIMScheduler`], will be ignored for others.
1194
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
1195
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
1196
1196
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
1197
1197
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
1198
1198
  to make generation deterministic.
@@ -1648,7 +1648,7 @@ class StableDiffusionXLControlNetPAGImg2ImgPipeline(
1648
1648
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
1649
1649
  self.unet.to("cpu")
1650
1650
  self.controlnet.to("cpu")
1651
- torch.cuda.empty_cache()
1651
+ empty_device_cache()
1652
1652
 
1653
1653
  if not output_type == "latent":
1654
1654
  # make sure the VAE is in float32 mode, as it overflows in float16
@@ -1,4 +1,4 @@
1
- # Copyright 2024 HunyuanDiT Authors and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 HunyuanDiT Authors and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -131,7 +131,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
131
131
  r"""
132
132
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
133
133
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
134
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
134
+ Flawed](https://huggingface.co/papers/2305.08891).
135
135
 
136
136
  Args:
137
137
  noise_cfg (`torch.Tensor`):
@@ -443,7 +443,7 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin):
443
443
  def prepare_extra_step_kwargs(self, generator, eta):
444
444
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
445
445
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
446
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
446
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
447
447
  # and should be between [0, 1]
448
448
 
449
449
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -566,7 +566,7 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin):
566
566
  return self._guidance_rescale
567
567
 
568
568
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
569
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
569
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
570
570
  # corresponds to doing no classifier free guidance.
571
571
  @property
572
572
  def do_classifier_free_guidance(self):
@@ -638,8 +638,8 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin):
638
638
  num_images_per_prompt (`int`, *optional*, defaults to 1):
639
639
  The number of images to generate per prompt.
640
640
  eta (`float`, *optional*, defaults to 0.0):
641
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
642
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
641
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
642
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
643
643
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
644
644
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
645
645
  generation deterministic.
@@ -675,7 +675,7 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin):
675
675
  inputs will be passed.
676
676
  guidance_rescale (`float`, *optional*, defaults to 0.0):
677
677
  Rescale the noise_cfg according to `guidance_rescale`. Based on findings of [Common Diffusion Noise
678
- Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
678
+ Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891). See Section 3.4
679
679
  original_size (`Tuple[int, int]`, *optional*, defaults to `(1024, 1024)`):
680
680
  The original size of the image. Used to calculate the time ids.
681
681
  target_size (`Tuple[int, int]`, *optional*):
@@ -915,7 +915,7 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin):
915
915
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
916
916
 
917
917
  if self.do_classifier_free_guidance and guidance_rescale > 0.0:
918
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
918
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
919
919
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
920
920
 
921
921
  # compute the previous noisy sample x_t -> x_t-1
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Stability AI, Kwai-Kolors Team and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Stability AI, Kwai-Kolors Team and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -453,7 +453,7 @@ class KolorsPAGPipeline(
453
453
  def prepare_extra_step_kwargs(self, generator, eta):
454
454
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
455
455
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
456
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
456
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
457
457
  # and should be between [0, 1]
458
458
 
459
459
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -651,7 +651,7 @@ class KolorsPAGPipeline(
651
651
  return self._guidance_scale
652
652
 
653
653
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
654
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
654
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
655
655
  # corresponds to doing no classifier free guidance.
656
656
  @property
657
657
  def do_classifier_free_guidance(self):
@@ -749,11 +749,11 @@ class KolorsPAGPipeline(
749
749
  "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
750
750
  Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
751
751
  guidance_scale (`float`, *optional*, defaults to 5.0):
752
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
753
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
754
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
755
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
756
- usually at the expense of lower image quality.
752
+ Guidance scale as defined in [Classifier-Free Diffusion
753
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
754
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
755
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
756
+ the text `prompt`, usually at the expense of lower image quality.
757
757
  negative_prompt (`str` or `List[str]`, *optional*):
758
758
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
759
759
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -761,8 +761,8 @@ class KolorsPAGPipeline(
761
761
  num_images_per_prompt (`int`, *optional*, defaults to 1):
762
762
  The number of images to generate per prompt.
763
763
  eta (`float`, *optional*, defaults to 0.0):
764
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
765
- [`schedulers.DDIMScheduler`], will be ignored for others.
764
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
765
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
766
766
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
767
767
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
768
768
  to make generation deterministic.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 PixArt-Sigma Authors and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 PixArt-Sigma Authors and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -326,7 +326,7 @@ class PixArtSigmaPAGPipeline(DiffusionPipeline, PAGMixin):
326
326
  def prepare_extra_step_kwargs(self, generator, eta):
327
327
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
328
328
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
329
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
329
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
330
330
  # and should be between [0, 1]
331
331
 
332
332
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -488,7 +488,7 @@ class PixArtSigmaPAGPipeline(DiffusionPipeline, PAGMixin):
488
488
  # &amp
489
489
  caption = re.sub(r"&amp", "", caption)
490
490
 
491
- # ip adresses:
491
+ # ip addresses:
492
492
  caption = re.sub(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", " ", caption)
493
493
 
494
494
  # article ids:
@@ -624,11 +624,11 @@ class PixArtSigmaPAGPipeline(DiffusionPipeline, PAGMixin):
624
624
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
625
625
  will be used.
626
626
  guidance_scale (`float`, *optional*, defaults to 4.5):
627
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
628
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
629
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
630
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
631
- usually at the expense of lower image quality.
627
+ Guidance scale as defined in [Classifier-Free Diffusion
628
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
629
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
630
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
631
+ the text `prompt`, usually at the expense of lower image quality.
632
632
  num_images_per_prompt (`int`, *optional*, defaults to 1):
633
633
  The number of images to generate per prompt.
634
634
  height (`int`, *optional*, defaults to self.unet.config.sample_size):
@@ -636,8 +636,8 @@ class PixArtSigmaPAGPipeline(DiffusionPipeline, PAGMixin):
636
636
  width (`int`, *optional*, defaults to self.unet.config.sample_size):
637
637
  The width in pixels of the generated image.
638
638
  eta (`float`, *optional*, defaults to 0.0):
639
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
640
- [`schedulers.DDIMScheduler`], will be ignored for others.
639
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
640
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
641
641
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
642
642
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
643
643
  to make generation deterministic.
@@ -729,7 +729,7 @@ class PixArtSigmaPAGPipeline(DiffusionPipeline, PAGMixin):
729
729
  device = self._execution_device
730
730
 
731
731
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
732
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
732
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
733
733
  # corresponds to doing no classifier free guidance.
734
734
  do_classifier_free_guidance = guidance_scale > 1.0
735
735