diffusers 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (478) hide show
  1. diffusers/__init__.py +48 -1
  2. diffusers/commands/__init__.py +1 -1
  3. diffusers/commands/diffusers_cli.py +1 -1
  4. diffusers/commands/env.py +1 -1
  5. diffusers/commands/fp16_safetensors.py +1 -1
  6. diffusers/dependency_versions_check.py +1 -1
  7. diffusers/dependency_versions_table.py +1 -1
  8. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  9. diffusers/hooks/faster_cache.py +2 -2
  10. diffusers/hooks/group_offloading.py +128 -29
  11. diffusers/hooks/hooks.py +2 -2
  12. diffusers/hooks/layerwise_casting.py +3 -3
  13. diffusers/hooks/pyramid_attention_broadcast.py +1 -1
  14. diffusers/image_processor.py +7 -2
  15. diffusers/loaders/__init__.py +4 -0
  16. diffusers/loaders/ip_adapter.py +5 -14
  17. diffusers/loaders/lora_base.py +212 -111
  18. diffusers/loaders/lora_conversion_utils.py +275 -34
  19. diffusers/loaders/lora_pipeline.py +1554 -819
  20. diffusers/loaders/peft.py +52 -109
  21. diffusers/loaders/single_file.py +2 -2
  22. diffusers/loaders/single_file_model.py +20 -4
  23. diffusers/loaders/single_file_utils.py +225 -5
  24. diffusers/loaders/textual_inversion.py +3 -2
  25. diffusers/loaders/transformer_flux.py +1 -1
  26. diffusers/loaders/transformer_sd3.py +2 -2
  27. diffusers/loaders/unet.py +2 -16
  28. diffusers/loaders/unet_loader_utils.py +1 -1
  29. diffusers/loaders/utils.py +1 -1
  30. diffusers/models/__init__.py +15 -1
  31. diffusers/models/activations.py +5 -5
  32. diffusers/models/adapter.py +2 -3
  33. diffusers/models/attention.py +4 -4
  34. diffusers/models/attention_flax.py +10 -10
  35. diffusers/models/attention_processor.py +14 -10
  36. diffusers/models/auto_model.py +47 -10
  37. diffusers/models/autoencoders/__init__.py +1 -0
  38. diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
  39. diffusers/models/autoencoders/autoencoder_dc.py +3 -3
  40. diffusers/models/autoencoders/autoencoder_kl.py +4 -4
  41. diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
  42. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
  43. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
  44. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
  45. diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
  46. diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
  47. diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
  48. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
  49. diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
  50. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
  51. diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
  52. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  53. diffusers/models/autoencoders/vae.py +13 -2
  54. diffusers/models/autoencoders/vq_model.py +2 -2
  55. diffusers/models/cache_utils.py +1 -1
  56. diffusers/models/controlnet.py +1 -1
  57. diffusers/models/controlnet_flux.py +1 -1
  58. diffusers/models/controlnet_sd3.py +1 -1
  59. diffusers/models/controlnet_sparsectrl.py +1 -1
  60. diffusers/models/controlnets/__init__.py +1 -0
  61. diffusers/models/controlnets/controlnet.py +3 -3
  62. diffusers/models/controlnets/controlnet_flax.py +1 -1
  63. diffusers/models/controlnets/controlnet_flux.py +16 -15
  64. diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
  65. diffusers/models/controlnets/controlnet_sana.py +290 -0
  66. diffusers/models/controlnets/controlnet_sd3.py +1 -1
  67. diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
  68. diffusers/models/controlnets/controlnet_union.py +1 -1
  69. diffusers/models/controlnets/controlnet_xs.py +7 -7
  70. diffusers/models/controlnets/multicontrolnet.py +4 -5
  71. diffusers/models/controlnets/multicontrolnet_union.py +5 -6
  72. diffusers/models/downsampling.py +2 -2
  73. diffusers/models/embeddings.py +10 -12
  74. diffusers/models/embeddings_flax.py +2 -2
  75. diffusers/models/lora.py +3 -3
  76. diffusers/models/modeling_utils.py +44 -14
  77. diffusers/models/normalization.py +4 -4
  78. diffusers/models/resnet.py +2 -2
  79. diffusers/models/resnet_flax.py +1 -1
  80. diffusers/models/transformers/__init__.py +5 -0
  81. diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
  82. diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
  83. diffusers/models/transformers/consisid_transformer_3d.py +1 -1
  84. diffusers/models/transformers/dit_transformer_2d.py +2 -2
  85. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  86. diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
  87. diffusers/models/transformers/latte_transformer_3d.py +4 -5
  88. diffusers/models/transformers/lumina_nextdit2d.py +2 -2
  89. diffusers/models/transformers/pixart_transformer_2d.py +3 -3
  90. diffusers/models/transformers/prior_transformer.py +1 -1
  91. diffusers/models/transformers/sana_transformer.py +8 -3
  92. diffusers/models/transformers/stable_audio_transformer.py +5 -9
  93. diffusers/models/transformers/t5_film_transformer.py +3 -3
  94. diffusers/models/transformers/transformer_2d.py +1 -1
  95. diffusers/models/transformers/transformer_allegro.py +1 -1
  96. diffusers/models/transformers/transformer_chroma.py +742 -0
  97. diffusers/models/transformers/transformer_cogview3plus.py +5 -10
  98. diffusers/models/transformers/transformer_cogview4.py +317 -25
  99. diffusers/models/transformers/transformer_cosmos.py +579 -0
  100. diffusers/models/transformers/transformer_flux.py +9 -11
  101. diffusers/models/transformers/transformer_hidream_image.py +942 -0
  102. diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
  103. diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
  104. diffusers/models/transformers/transformer_ltx.py +2 -2
  105. diffusers/models/transformers/transformer_lumina2.py +1 -1
  106. diffusers/models/transformers/transformer_mochi.py +1 -1
  107. diffusers/models/transformers/transformer_omnigen.py +2 -2
  108. diffusers/models/transformers/transformer_sd3.py +7 -7
  109. diffusers/models/transformers/transformer_temporal.py +1 -1
  110. diffusers/models/transformers/transformer_wan.py +24 -8
  111. diffusers/models/transformers/transformer_wan_vace.py +393 -0
  112. diffusers/models/unets/unet_1d.py +1 -1
  113. diffusers/models/unets/unet_1d_blocks.py +1 -1
  114. diffusers/models/unets/unet_2d.py +1 -1
  115. diffusers/models/unets/unet_2d_blocks.py +1 -1
  116. diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
  117. diffusers/models/unets/unet_2d_condition.py +2 -2
  118. diffusers/models/unets/unet_2d_condition_flax.py +2 -2
  119. diffusers/models/unets/unet_3d_blocks.py +1 -1
  120. diffusers/models/unets/unet_3d_condition.py +3 -3
  121. diffusers/models/unets/unet_i2vgen_xl.py +3 -3
  122. diffusers/models/unets/unet_kandinsky3.py +1 -1
  123. diffusers/models/unets/unet_motion_model.py +2 -2
  124. diffusers/models/unets/unet_stable_cascade.py +1 -1
  125. diffusers/models/upsampling.py +2 -2
  126. diffusers/models/vae_flax.py +2 -2
  127. diffusers/models/vq_model.py +1 -1
  128. diffusers/pipelines/__init__.py +37 -6
  129. diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
  130. diffusers/pipelines/amused/pipeline_amused.py +7 -6
  131. diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
  132. diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
  133. diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
  134. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
  135. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
  136. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
  137. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
  138. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
  139. diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
  140. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  141. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
  142. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
  143. diffusers/pipelines/auto_pipeline.py +6 -7
  144. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  145. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  146. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
  147. diffusers/pipelines/chroma/__init__.py +49 -0
  148. diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
  149. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
  150. diffusers/pipelines/chroma/pipeline_output.py +21 -0
  151. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
  152. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
  153. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
  154. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
  155. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
  156. diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
  157. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
  158. diffusers/pipelines/consisid/consisid_utils.py +2 -2
  159. diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
  160. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  161. diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
  162. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
  163. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
  164. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
  165. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
  166. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
  167. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
  168. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
  169. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
  170. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
  171. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  172. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
  173. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
  174. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  175. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
  176. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
  177. diffusers/pipelines/cosmos/__init__.py +54 -0
  178. diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
  179. diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
  180. diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
  181. diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
  182. diffusers/pipelines/cosmos/pipeline_output.py +40 -0
  183. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
  184. diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
  185. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  186. diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
  187. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
  188. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
  189. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
  190. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
  191. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
  192. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
  193. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
  194. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  195. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
  196. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  197. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
  198. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
  199. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  200. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  201. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  202. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  203. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  204. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
  205. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
  206. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
  207. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
  208. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
  209. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
  210. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  211. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
  212. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
  213. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
  214. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
  215. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
  216. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  217. diffusers/pipelines/dit/pipeline_dit.py +1 -1
  218. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
  219. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
  220. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
  221. diffusers/pipelines/flux/modeling_flux.py +1 -1
  222. diffusers/pipelines/flux/pipeline_flux.py +10 -17
  223. diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
  224. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
  225. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
  226. diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
  227. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
  228. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
  229. diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
  230. diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
  231. diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
  232. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
  233. diffusers/pipelines/free_init_utils.py +2 -2
  234. diffusers/pipelines/free_noise_utils.py +3 -3
  235. diffusers/pipelines/hidream_image/__init__.py +47 -0
  236. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
  237. diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
  238. diffusers/pipelines/hunyuan_video/__init__.py +2 -0
  239. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
  240. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
  241. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
  242. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
  243. diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
  244. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
  245. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
  246. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
  247. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
  248. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
  249. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
  250. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
  251. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  252. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
  253. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
  254. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
  255. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
  256. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
  257. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
  258. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
  259. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
  260. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
  261. diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
  262. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
  263. diffusers/pipelines/kolors/text_encoder.py +3 -3
  264. diffusers/pipelines/kolors/tokenizer.py +1 -1
  265. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
  266. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
  267. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  268. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
  269. diffusers/pipelines/latte/pipeline_latte.py +12 -12
  270. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
  271. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
  272. diffusers/pipelines/ltx/__init__.py +4 -0
  273. diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
  274. diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
  275. diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
  276. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
  277. diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
  278. diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
  279. diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
  280. diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
  281. diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
  282. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
  283. diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
  284. diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
  285. diffusers/pipelines/onnx_utils.py +15 -2
  286. diffusers/pipelines/pag/pag_utils.py +2 -2
  287. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
  288. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
  289. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
  290. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
  291. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
  292. diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
  293. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
  294. diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
  295. diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
  296. diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
  297. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
  298. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
  299. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
  300. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
  301. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
  302. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
  303. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
  304. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  305. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
  306. diffusers/pipelines/pia/pipeline_pia.py +8 -6
  307. diffusers/pipelines/pipeline_flax_utils.py +3 -4
  308. diffusers/pipelines/pipeline_loading_utils.py +89 -13
  309. diffusers/pipelines/pipeline_utils.py +105 -33
  310. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
  311. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
  312. diffusers/pipelines/sana/__init__.py +4 -0
  313. diffusers/pipelines/sana/pipeline_sana.py +23 -21
  314. diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
  315. diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
  316. diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
  317. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
  318. diffusers/pipelines/shap_e/camera.py +1 -1
  319. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  320. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  321. diffusers/pipelines/shap_e/renderer.py +3 -3
  322. diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
  323. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
  324. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
  325. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
  326. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
  327. diffusers/pipelines/stable_diffusion/__init__.py +0 -7
  328. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  329. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
  330. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  331. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
  332. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  333. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
  334. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
  335. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
  336. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
  337. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
  338. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
  339. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
  340. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
  341. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
  342. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
  343. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
  344. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
  345. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
  346. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
  347. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  348. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  349. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  350. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
  351. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
  352. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
  353. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
  354. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
  355. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
  356. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
  357. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
  358. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
  359. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
  360. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
  361. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
  362. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  363. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
  364. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  365. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
  366. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
  367. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
  368. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
  369. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
  370. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
  371. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
  372. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
  373. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
  374. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
  375. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
  376. diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
  377. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
  378. diffusers/pipelines/unclip/text_proj.py +2 -2
  379. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
  380. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  381. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
  382. diffusers/pipelines/visualcloze/__init__.py +52 -0
  383. diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
  384. diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
  385. diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
  386. diffusers/pipelines/wan/__init__.py +2 -0
  387. diffusers/pipelines/wan/pipeline_wan.py +17 -12
  388. diffusers/pipelines/wan/pipeline_wan_i2v.py +42 -20
  389. diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
  390. diffusers/pipelines/wan/pipeline_wan_video2video.py +18 -18
  391. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  392. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
  393. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  394. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  395. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
  396. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
  397. diffusers/quantizers/__init__.py +179 -1
  398. diffusers/quantizers/base.py +6 -1
  399. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
  400. diffusers/quantizers/bitsandbytes/utils.py +10 -7
  401. diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
  402. diffusers/quantizers/gguf/utils.py +16 -13
  403. diffusers/quantizers/quantization_config.py +18 -16
  404. diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
  405. diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
  406. diffusers/schedulers/__init__.py +3 -1
  407. diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
  408. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  409. diffusers/schedulers/scheduling_consistency_models.py +1 -1
  410. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
  411. diffusers/schedulers/scheduling_ddim.py +8 -8
  412. diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
  413. diffusers/schedulers/scheduling_ddim_flax.py +6 -6
  414. diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
  415. diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
  416. diffusers/schedulers/scheduling_ddpm.py +9 -9
  417. diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
  418. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
  419. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
  420. diffusers/schedulers/scheduling_deis_multistep.py +8 -8
  421. diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
  422. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
  423. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
  424. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
  425. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  426. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
  427. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
  428. diffusers/schedulers/scheduling_edm_euler.py +20 -11
  429. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
  430. diffusers/schedulers/scheduling_euler_discrete.py +3 -3
  431. diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
  432. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
  433. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
  434. diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
  435. diffusers/schedulers/scheduling_heun_discrete.py +2 -2
  436. diffusers/schedulers/scheduling_ipndm.py +2 -2
  437. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
  438. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
  439. diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
  440. diffusers/schedulers/scheduling_lcm.py +3 -3
  441. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  442. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  443. diffusers/schedulers/scheduling_pndm.py +4 -4
  444. diffusers/schedulers/scheduling_pndm_flax.py +4 -4
  445. diffusers/schedulers/scheduling_repaint.py +9 -9
  446. diffusers/schedulers/scheduling_sasolver.py +15 -15
  447. diffusers/schedulers/scheduling_scm.py +1 -1
  448. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  449. diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
  450. diffusers/schedulers/scheduling_tcd.py +3 -3
  451. diffusers/schedulers/scheduling_unclip.py +5 -5
  452. diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
  453. diffusers/schedulers/scheduling_utils.py +1 -1
  454. diffusers/schedulers/scheduling_utils_flax.py +1 -1
  455. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  456. diffusers/training_utils.py +13 -5
  457. diffusers/utils/__init__.py +5 -0
  458. diffusers/utils/accelerate_utils.py +1 -1
  459. diffusers/utils/doc_utils.py +1 -1
  460. diffusers/utils/dummy_pt_objects.py +120 -0
  461. diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
  462. diffusers/utils/dynamic_modules_utils.py +21 -3
  463. diffusers/utils/export_utils.py +1 -1
  464. diffusers/utils/import_utils.py +81 -18
  465. diffusers/utils/logging.py +1 -1
  466. diffusers/utils/outputs.py +2 -1
  467. diffusers/utils/peft_utils.py +91 -8
  468. diffusers/utils/state_dict_utils.py +20 -3
  469. diffusers/utils/testing_utils.py +59 -7
  470. diffusers/utils/torch_utils.py +25 -5
  471. diffusers/video_processor.py +2 -2
  472. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/METADATA +3 -3
  473. diffusers-0.34.0.dist-info/RECORD +639 -0
  474. diffusers-0.33.0.dist-info/RECORD +0 -608
  475. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
  476. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/WHEEL +0 -0
  477. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
  478. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,12 @@
1
1
  import inspect
2
2
  from typing import Callable, Dict, List, Optional, Union
3
3
 
4
- import numpy as np
5
4
  import PIL
6
5
  import PIL.Image
7
6
  import torch
8
7
  from transformers import T5EncoderModel, T5Tokenizer
9
8
 
9
+ from ...image_processor import VaeImageProcessor
10
10
  from ...loaders import StableDiffusionLoraLoaderMixin
11
11
  from ...models import Kandinsky3UNet, VQModel
12
12
  from ...schedulers import DDPMScheduler
@@ -53,24 +53,6 @@ EXAMPLE_DOC_STRING = """
53
53
  """
54
54
 
55
55
 
56
- def downscale_height_and_width(height, width, scale_factor=8):
57
- new_height = height // scale_factor**2
58
- if height % scale_factor**2 != 0:
59
- new_height += 1
60
- new_width = width // scale_factor**2
61
- if width % scale_factor**2 != 0:
62
- new_width += 1
63
- return new_height * scale_factor, new_width * scale_factor
64
-
65
-
66
- def prepare_image(pil_image):
67
- arr = np.array(pil_image.convert("RGB"))
68
- arr = arr.astype(np.float32) / 127.5 - 1
69
- arr = np.transpose(arr, [2, 0, 1])
70
- image = torch.from_numpy(arr).unsqueeze(0)
71
- return image
72
-
73
-
74
56
  class Kandinsky3Img2ImgPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
75
57
  model_cpu_offload_seq = "text_encoder->movq->unet->movq"
76
58
  _callback_tensor_inputs = [
@@ -94,6 +76,14 @@ class Kandinsky3Img2ImgPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixi
94
76
  self.register_modules(
95
77
  tokenizer=tokenizer, text_encoder=text_encoder, unet=unet, scheduler=scheduler, movq=movq
96
78
  )
79
+ movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1) if getattr(self, "movq", None) else 8
80
+ movq_latent_channels = self.movq.config.latent_channels if getattr(self, "movq", None) else 4
81
+ self.image_processor = VaeImageProcessor(
82
+ vae_scale_factor=movq_scale_factor,
83
+ vae_latent_channels=movq_latent_channels,
84
+ resample="bicubic",
85
+ reducing_gap=1,
86
+ )
97
87
 
98
88
  def get_timesteps(self, num_inference_steps, strength, device):
99
89
  # get the original timestep using init_timestep
@@ -309,7 +299,7 @@ class Kandinsky3Img2ImgPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixi
309
299
  def prepare_extra_step_kwargs(self, generator, eta):
310
300
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
311
301
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
312
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
302
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
313
303
  # and should be between [0, 1]
314
304
 
315
305
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -449,11 +439,11 @@ class Kandinsky3Img2ImgPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixi
449
439
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
450
440
  expense of slower inference.
451
441
  guidance_scale (`float`, *optional*, defaults to 3.0):
452
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
453
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
454
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
455
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
456
- usually at the expense of lower image quality.
442
+ Guidance scale as defined in [Classifier-Free Diffusion
443
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
444
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
445
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
446
+ the text `prompt`, usually at the expense of lower image quality.
457
447
  negative_prompt (`str` or `List[str]`, *optional*):
458
448
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
459
449
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -566,7 +556,7 @@ class Kandinsky3Img2ImgPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixi
566
556
  f"Input is in incorrect format: {[type(i) for i in image]}. Currently, we only support PIL image and pytorch tensor"
567
557
  )
568
558
 
569
- image = torch.cat([prepare_image(i) for i in image], dim=0)
559
+ image = torch.cat([self.image_processor.preprocess(i) for i in image], dim=0)
570
560
  image = image.to(dtype=prompt_embeds.dtype, device=device)
571
561
  # 4. Prepare timesteps
572
562
  self.scheduler.set_timesteps(num_inference_steps, device=device)
@@ -630,20 +620,9 @@ class Kandinsky3Img2ImgPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixi
630
620
  xm.mark_step()
631
621
 
632
622
  # post-processing
633
- if output_type not in ["pt", "np", "pil", "latent"]:
634
- raise ValueError(
635
- f"Only the output types `pt`, `pil`, `np` and `latent` are supported not output_type={output_type}"
636
- )
637
623
  if not output_type == "latent":
638
624
  image = self.movq.decode(latents, force_not_quantize=True)["sample"]
639
-
640
- if output_type in ["np", "pil"]:
641
- image = image * 0.5 + 0.5
642
- image = image.clamp(0, 1)
643
- image = image.cpu().permute(0, 2, 3, 1).float().numpy()
644
-
645
- if output_type == "pil":
646
- image = self.numpy_to_pil(image)
625
+ image = self.image_processor.postprocess(image, output_type)
647
626
  else:
648
627
  image = latents
649
628
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Stability AI, Kwai-Kolors Team and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Stability AI, Kwai-Kolors Team and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -436,7 +436,7 @@ class KolorsPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffusionLor
436
436
  def prepare_extra_step_kwargs(self, generator, eta):
437
437
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
438
438
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
439
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
439
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
440
440
  # and should be between [0, 1]
441
441
 
442
442
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -633,7 +633,7 @@ class KolorsPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffusionLor
633
633
  return self._guidance_scale
634
634
 
635
635
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
636
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
636
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
637
637
  # corresponds to doing no classifier free guidance.
638
638
  @property
639
639
  def do_classifier_free_guidance(self):
@@ -729,11 +729,11 @@ class KolorsPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffusionLor
729
729
  "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
730
730
  Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
731
731
  guidance_scale (`float`, *optional*, defaults to 5.0):
732
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
733
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
734
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
735
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
736
- usually at the expense of lower image quality.
732
+ Guidance scale as defined in [Classifier-Free Diffusion
733
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
734
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
735
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
736
+ the text `prompt`, usually at the expense of lower image quality.
737
737
  negative_prompt (`str` or `List[str]`, *optional*):
738
738
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
739
739
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -741,8 +741,8 @@ class KolorsPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffusionLor
741
741
  num_images_per_prompt (`int`, *optional*, defaults to 1):
742
742
  The number of images to generate per prompt.
743
743
  eta (`float`, *optional*, defaults to 0.0):
744
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
745
- [`schedulers.DDIMScheduler`], will be ignored for others.
744
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
745
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
746
746
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
747
747
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
748
748
  to make generation deterministic.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Stability AI, Kwai-Kolors Team and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Stability AI, Kwai-Kolors Team and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -25,7 +25,7 @@ from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
25
25
  from ...models.attention_processor import AttnProcessor2_0, FusedAttnProcessor2_0, XFormersAttnProcessor
26
26
  from ...schedulers import KarrasDiffusionSchedulers
27
27
  from ...utils import is_torch_xla_available, logging, replace_example_docstring
28
- from ...utils.torch_utils import randn_tensor
28
+ from ...utils.torch_utils import empty_device_cache, randn_tensor
29
29
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
30
30
  from .pipeline_output import KolorsPipelineOutput
31
31
  from .text_encoder import ChatGLMModel
@@ -456,7 +456,7 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
456
456
  def prepare_extra_step_kwargs(self, generator, eta):
457
457
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
458
458
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
459
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
459
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
460
460
  # and should be between [0, 1]
461
461
 
462
462
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -618,7 +618,7 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
618
618
  # Offload text encoder if `enable_model_cpu_offload` was enabled
619
619
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
620
620
  self.text_encoder_2.to("cpu")
621
- torch.cuda.empty_cache()
621
+ empty_device_cache()
622
622
 
623
623
  image = image.to(device=device, dtype=dtype)
624
624
 
@@ -761,7 +761,7 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
761
761
  return self._guidance_scale
762
762
 
763
763
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
764
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
764
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
765
765
  # corresponds to doing no classifier free guidance.
766
766
  @property
767
767
  def do_classifier_free_guidance(self):
@@ -880,11 +880,11 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
880
880
  "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
881
881
  Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
882
882
  guidance_scale (`float`, *optional*, defaults to 5.0):
883
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
884
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
885
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
886
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
887
- usually at the expense of lower image quality.
883
+ Guidance scale as defined in [Classifier-Free Diffusion
884
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
885
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
886
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
887
+ the text `prompt`, usually at the expense of lower image quality.
888
888
  negative_prompt (`str` or `List[str]`, *optional*):
889
889
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
890
890
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -892,8 +892,8 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
892
892
  num_images_per_prompt (`int`, *optional*, defaults to 1):
893
893
  The number of images to generate per prompt.
894
894
  eta (`float`, *optional*, defaults to 0.0):
895
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
896
- [`schedulers.DDIMScheduler`], will be ignored for others.
895
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
896
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
897
897
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
898
898
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
899
899
  to make generation deterministic.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 ChatGLM3-6B Model Team, Kwai-Kolors Team and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 ChatGLM3-6B Model Team, Kwai-Kolors Team and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -434,7 +434,7 @@ class MLP(torch.nn.Module):
434
434
 
435
435
  self.add_bias = config.add_bias_linear
436
436
 
437
- # Project to 4h. If using swiglu double the output width, see https://arxiv.org/pdf/2002.05202.pdf
437
+ # Project to 4h. If using swiglu double the output width, see https://huggingface.co/papers/2002.05202
438
438
  self.dense_h_to_4h = nn.Linear(
439
439
  config.hidden_size,
440
440
  config.ffn_hidden_size * 2,
@@ -668,7 +668,7 @@ class Embedding(torch.nn.Module):
668
668
  # Embeddings.
669
669
  words_embeddings = self.word_embeddings(input_ids)
670
670
  embeddings = words_embeddings
671
- # Data format change to avoid explicit tranposes : [b s h] --> [s b h].
671
+ # Data format change to avoid explicit transposes : [b s h] --> [s b h].
672
672
  embeddings = embeddings.transpose(0, 1).contiguous()
673
673
  # If the input flag for fp32 residual connection is set, convert for float.
674
674
  if self.fp32_residual_connection:
@@ -1,4 +1,4 @@
1
- # Copyright 2024 ChatGLM3-6B Model Team, Kwai-Kolors Team and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 ChatGLM3-6B Model Team, Kwai-Kolors Team and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Stanford University Team and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Stanford University Team and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -607,7 +607,7 @@ class LatentConsistencyModelImg2ImgPipeline(
607
607
  def prepare_extra_step_kwargs(self, generator, eta):
608
608
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
609
609
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
610
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
610
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
611
611
  # and should be between [0, 1]
612
612
 
613
613
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Stanford University Team and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Stanford University Team and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -548,7 +548,7 @@ class LatentConsistencyModelPipeline(
548
548
  def prepare_extra_step_kwargs(self, generator, eta):
549
549
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
550
550
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
551
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
551
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
552
552
  # and should be between [0, 1]
553
553
 
554
554
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -95,8 +95,8 @@ class LDMSuperResolutionPipeline(DiffusionPipeline):
95
95
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
96
96
  expense of slower inference.
97
97
  eta (`float`, *optional*, defaults to 0.0):
98
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
99
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
98
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
99
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
100
100
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
101
101
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
102
102
  generation deterministic.
@@ -166,7 +166,7 @@ class LDMSuperResolutionPipeline(DiffusionPipeline):
166
166
 
167
167
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature.
168
168
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
169
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
169
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
170
170
  # and should be between [0, 1]
171
171
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
172
172
  extra_kwargs = {}
@@ -1,4 +1,4 @@
1
- # Copyright 2024 the Latte Team and The HuggingFace Team.
1
+ # Copyright 2025 the Latte Team and The HuggingFace Team.
2
2
  # All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -356,7 +356,7 @@ class LattePipeline(DiffusionPipeline):
356
356
  def prepare_extra_step_kwargs(self, generator, eta):
357
357
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
358
358
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
359
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
359
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
360
360
  # and should be between [0, 1]
361
361
 
362
362
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -501,7 +501,7 @@ class LattePipeline(DiffusionPipeline):
501
501
  # &amp
502
502
  caption = re.sub(r"&amp", "", caption)
503
503
 
504
- # ip adresses:
504
+ # ip addresses:
505
505
  caption = re.sub(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", " ", caption)
506
506
 
507
507
  # article ids:
@@ -592,7 +592,7 @@ class LattePipeline(DiffusionPipeline):
592
592
  return self._guidance_scale
593
593
 
594
594
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
595
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
595
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
596
596
  # corresponds to doing no classifier free guidance.
597
597
  @property
598
598
  def do_classifier_free_guidance(self):
@@ -657,11 +657,11 @@ class LattePipeline(DiffusionPipeline):
657
657
  Custom timesteps to use for the denoising process. If not defined, equal spaced `num_inference_steps`
658
658
  timesteps are used. Must be in descending order.
659
659
  guidance_scale (`float`, *optional*, defaults to 7.0):
660
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
661
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
662
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
663
- 1`. Higher guidance scale encourages to generate videos that are closely linked to the text `prompt`,
664
- usually at the expense of lower video quality.
660
+ Guidance scale as defined in [Classifier-Free Diffusion
661
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
662
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
663
+ `guidance_scale > 1`. Higher guidance scale encourages to generate videos that are closely linked to
664
+ the text `prompt`, usually at the expense of lower video quality.
665
665
  video_length (`int`, *optional*, defaults to 16):
666
666
  The number of video frames that are generated. Defaults to 16 frames which at 8 frames per seconds
667
667
  num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -671,8 +671,8 @@ class LattePipeline(DiffusionPipeline):
671
671
  width (`int`, *optional*, defaults to self.unet.config.sample_size):
672
672
  The width in pixels of the generated video.
673
673
  eta (`float`, *optional*, defaults to 0.0):
674
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
675
- [`schedulers.DDIMScheduler`], will be ignored for others.
674
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
675
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
676
676
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
677
677
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
678
678
  to make generation deterministic.
@@ -747,7 +747,7 @@ class LattePipeline(DiffusionPipeline):
747
747
  device = self._execution_device
748
748
 
749
749
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
750
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
750
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
751
751
  # corresponds to doing no classifier free guidance.
752
752
  do_classifier_free_guidance = guidance_scale > 1.0
753
753
 
@@ -244,7 +244,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
244
244
  r"""
245
245
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
246
246
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
247
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
247
+ Flawed](https://huggingface.co/papers/2305.08891).
248
248
 
249
249
  Args:
250
250
  noise_cfg (`torch.Tensor`):
@@ -439,7 +439,7 @@ class LEditsPPPipelineStableDiffusion(
439
439
  def prepare_extra_step_kwargs(self, eta, generator=None):
440
440
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
441
441
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
442
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
442
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
443
443
  # and should be between [0, 1]
444
444
 
445
445
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -808,7 +808,7 @@ class LEditsPPPipelineStableDiffusion(
808
808
  edit_guidance_scale (`float` or `List[float]`, *optional*, defaults to 5):
809
809
  Guidance scale for guiding the image generation. If provided as list values should correspond to
810
810
  `editing_prompt`. `edit_guidance_scale` is defined as `s_e` of equation 12 of [LEDITS++
811
- Paper](https://arxiv.org/abs/2301.12247).
811
+ Paper](https://huggingface.co/papers/2301.12247).
812
812
  edit_warmup_steps (`float` or `List[float]`, *optional*, defaults to 10):
813
813
  Number of diffusion steps (for each prompt) for which guidance will not be applied.
814
814
  edit_cooldown_steps (`float` or `List[float]`, *optional*, defaults to `None`):
@@ -816,7 +816,7 @@ class LEditsPPPipelineStableDiffusion(
816
816
  edit_threshold (`float` or `List[float]`, *optional*, defaults to 0.9):
817
817
  Masking threshold of guidance. Threshold should be proportional to the image region that is modified.
818
818
  'edit_threshold' is defined as 'λ' of equation 12 of [LEDITS++
819
- Paper](https://arxiv.org/abs/2301.12247).
819
+ Paper](https://huggingface.co/papers/2301.12247).
820
820
  user_mask (`torch.Tensor`, *optional*):
821
821
  User-provided mask for even better control over the editing process. This is helpful when LEDITS++'s
822
822
  implicit masks do not meet user preferences.
@@ -826,11 +826,11 @@ class LEditsPPPipelineStableDiffusion(
826
826
  use_cross_attn_mask (`bool`, defaults to `False`):
827
827
  Whether cross-attention masks are used. Cross-attention masks are always used when use_intersect_mask
828
828
  is set to true. Cross-attention masks are defined as 'M^1' of equation 12 of [LEDITS++
829
- paper](https://arxiv.org/pdf/2311.16711.pdf).
829
+ paper](https://huggingface.co/papers/2311.16711).
830
830
  use_intersect_mask (`bool`, defaults to `True`):
831
831
  Whether the masking term is calculated as intersection of cross-attention masks and masks derived from
832
832
  the noise estimate. Cross-attention mask are defined as 'M^1' and masks derived from the noise estimate
833
- are defined as 'M^2' of equation 12 of [LEDITS++ paper](https://arxiv.org/pdf/2311.16711.pdf).
833
+ are defined as 'M^2' of equation 12 of [LEDITS++ paper](https://huggingface.co/papers/2311.16711).
834
834
  attn_store_steps (`List[int]`, *optional*):
835
835
  Steps for which the attention maps are stored in the AttentionStore. Just for visualization purposes.
836
836
  store_averaged_over_steps (`bool`, defaults to `True`):
@@ -841,7 +841,7 @@ class LEditsPPPipelineStableDiffusion(
841
841
  [`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
842
842
  guidance_rescale (`float`, *optional*, defaults to 0.0):
843
843
  Guidance rescale factor from [Common Diffusion Noise Schedules and Sample Steps are
844
- Flawed](https://arxiv.org/pdf/2305.08891.pdf). Guidance rescale factor should fix overexposure when
844
+ Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
845
845
  using zero terminal SNR.
846
846
  clip_skip (`int`, *optional*):
847
847
  Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
@@ -1191,7 +1191,7 @@ class LEditsPPPipelineStableDiffusion(
1191
1191
  noise_pred = noise_pred_uncond + noise_guidance_edit
1192
1192
 
1193
1193
  if enable_edit_guidance and self.guidance_rescale > 0.0:
1194
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
1194
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
1195
1195
  noise_pred = rescale_noise_cfg(
1196
1196
  noise_pred,
1197
1197
  noise_pred_edit_concepts.mean(dim=0, keepdim=False),
@@ -1268,8 +1268,8 @@ class LEditsPPPipelineStableDiffusion(
1268
1268
  ):
1269
1269
  r"""
1270
1270
  The function to the pipeline for image inversion as described by the [LEDITS++
1271
- Paper](https://arxiv.org/abs/2301.12247). If the scheduler is set to [`~schedulers.DDIMScheduler`] the
1272
- inversion proposed by [edit-friendly DPDM](https://arxiv.org/abs/2304.06140) will be performed instead.
1271
+ Paper](https://huggingface.co/papers/2301.12247). If the scheduler is set to [`~schedulers.DDIMScheduler`] the
1272
+ inversion proposed by [edit-friendly DPDM](https://huggingface.co/papers/2304.06140) will be performed instead.
1273
1273
 
1274
1274
  Args:
1275
1275
  image (`PipelineImageInput`):
@@ -1443,7 +1443,7 @@ def compute_noise_ddim(scheduler, prev_latents, latents, timestep, noise_pred, e
1443
1443
  beta_prod_t = 1 - alpha_prod_t
1444
1444
 
1445
1445
  # 3. compute predicted original sample from predicted noise also called
1446
- # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
1446
+ # "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
1447
1447
  pred_original_sample = (latents - beta_prod_t ** (0.5) * noise_pred) / alpha_prod_t ** (0.5)
1448
1448
 
1449
1449
  # 4. Clip "predicted x_0"
@@ -1455,10 +1455,10 @@ def compute_noise_ddim(scheduler, prev_latents, latents, timestep, noise_pred, e
1455
1455
  variance = scheduler._get_variance(timestep, prev_timestep)
1456
1456
  std_dev_t = eta * variance ** (0.5)
1457
1457
 
1458
- # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
1458
+ # 6. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502
1459
1459
  pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * noise_pred
1460
1460
 
1461
- # modifed so that updated xtm1 is returned as well (to avoid error accumulation)
1461
+ # modified so that updated xtm1 is returned as well (to avoid error accumulation)
1462
1462
  mu_xt = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
1463
1463
  if variance > 0.0:
1464
1464
  noise = (prev_latents - mu_xt) / (variance ** (0.5) * eta)
@@ -622,7 +622,7 @@ class LEditsPPPipelineStableDiffusionXL(
622
622
  def prepare_extra_step_kwargs(self, eta, generator=None):
623
623
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
624
624
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
625
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
625
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
626
626
  # and should be between [0, 1]
627
627
 
628
628
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -747,7 +747,7 @@ class LEditsPPPipelineStableDiffusionXL(
747
747
  return self._clip_skip
748
748
 
749
749
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
750
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
750
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
751
751
  # corresponds to doing no classifier free guidance.
752
752
  @property
753
753
  def do_classifier_free_guidance(self):
@@ -901,9 +901,10 @@ class LEditsPPPipelineStableDiffusionXL(
901
901
  [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
902
902
  guidance_rescale (`float`, *optional*, defaults to 0.7):
903
903
  Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
904
- Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
905
- [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
906
- Guidance rescale factor should fix overexposure when using zero terminal SNR.
904
+ Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
905
+ [Common Diffusion Noise Schedules and Sample Steps are
906
+ Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
907
+ using zero terminal SNR.
907
908
  crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
908
909
  `crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
909
910
  `crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
@@ -929,7 +930,7 @@ class LEditsPPPipelineStableDiffusionXL(
929
930
  edit_guidance_scale (`float` or `List[float]`, *optional*, defaults to 5):
930
931
  Guidance scale for guiding the image generation. If provided as list values should correspond to
931
932
  `editing_prompt`. `edit_guidance_scale` is defined as `s_e` of equation 12 of [LEDITS++
932
- Paper](https://arxiv.org/abs/2301.12247).
933
+ Paper](https://huggingface.co/papers/2301.12247).
933
934
  edit_warmup_steps (`float` or `List[float]`, *optional*, defaults to 10):
934
935
  Number of diffusion steps (for each prompt) for which guidance is not applied.
935
936
  edit_cooldown_steps (`float` or `List[float]`, *optional*, defaults to `None`):
@@ -937,18 +938,18 @@ class LEditsPPPipelineStableDiffusionXL(
937
938
  edit_threshold (`float` or `List[float]`, *optional*, defaults to 0.9):
938
939
  Masking threshold of guidance. Threshold should be proportional to the image region that is modified.
939
940
  'edit_threshold' is defined as 'λ' of equation 12 of [LEDITS++
940
- Paper](https://arxiv.org/abs/2301.12247).
941
+ Paper](https://huggingface.co/papers/2301.12247).
941
942
  sem_guidance (`List[torch.Tensor]`, *optional*):
942
943
  List of pre-generated guidance vectors to be applied at generation. Length of the list has to
943
944
  correspond to `num_inference_steps`.
944
945
  use_cross_attn_mask:
945
946
  Whether cross-attention masks are used. Cross-attention masks are always used when use_intersect_mask
946
947
  is set to true. Cross-attention masks are defined as 'M^1' of equation 12 of [LEDITS++
947
- paper](https://arxiv.org/pdf/2311.16711.pdf).
948
+ paper](https://huggingface.co/papers/2311.16711).
948
949
  use_intersect_mask:
949
950
  Whether the masking term is calculated as intersection of cross-attention masks and masks derived from
950
951
  the noise estimate. Cross-attention mask are defined as 'M^1' and masks derived from the noise estimate
951
- are defined as 'M^2' of equation 12 of [LEDITS++ paper](https://arxiv.org/pdf/2311.16711.pdf).
952
+ are defined as 'M^2' of equation 12 of [LEDITS++ paper](https://huggingface.co/papers/2311.16711).
952
953
  user_mask:
953
954
  User-provided mask for even better control over the editing process. This is helpful when LEDITS++'s
954
955
  implicit masks do not meet user preferences.
@@ -1350,7 +1351,7 @@ class LEditsPPPipelineStableDiffusionXL(
1350
1351
 
1351
1352
  # compute the previous noisy sample x_t -> x_t-1
1352
1353
  if enable_edit_guidance and self.guidance_rescale > 0.0:
1353
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
1354
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
1354
1355
  noise_pred = rescale_noise_cfg(
1355
1356
  noise_pred,
1356
1357
  noise_pred_edit_concepts.mean(dim=0, keepdim=False),
@@ -1478,8 +1479,8 @@ class LEditsPPPipelineStableDiffusionXL(
1478
1479
  ):
1479
1480
  r"""
1480
1481
  The function to the pipeline for image inversion as described by the [LEDITS++
1481
- Paper](https://arxiv.org/abs/2301.12247). If the scheduler is set to [`~schedulers.DDIMScheduler`] the
1482
- inversion proposed by [edit-friendly DPDM](https://arxiv.org/abs/2304.06140) will be performed instead.
1482
+ Paper](https://huggingface.co/papers/2301.12247). If the scheduler is set to [`~schedulers.DDIMScheduler`] the
1483
+ inversion proposed by [edit-friendly DPDM](https://huggingface.co/papers/2304.06140) will be performed instead.
1483
1484
 
1484
1485
  Args:
1485
1486
  image (`PipelineImageInput`):
@@ -1691,7 +1692,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
1691
1692
  r"""
1692
1693
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
1693
1694
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
1694
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
1695
+ Flawed](https://huggingface.co/papers/2305.08891).
1695
1696
 
1696
1697
  Args:
1697
1698
  noise_cfg (`torch.Tensor`):
@@ -1727,7 +1728,7 @@ def compute_noise_ddim(scheduler, prev_latents, latents, timestep, noise_pred, e
1727
1728
  beta_prod_t = 1 - alpha_prod_t
1728
1729
 
1729
1730
  # 3. compute predicted original sample from predicted noise also called
1730
- # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
1731
+ # "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
1731
1732
  pred_original_sample = (latents - beta_prod_t ** (0.5) * noise_pred) / alpha_prod_t ** (0.5)
1732
1733
 
1733
1734
  # 4. Clip "predicted x_0"
@@ -1739,10 +1740,10 @@ def compute_noise_ddim(scheduler, prev_latents, latents, timestep, noise_pred, e
1739
1740
  variance = scheduler._get_variance(timestep, prev_timestep)
1740
1741
  std_dev_t = eta * variance ** (0.5)
1741
1742
 
1742
- # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
1743
+ # 6. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502
1743
1744
  pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * noise_pred
1744
1745
 
1745
- # modifed so that updated xtm1 is returned as well (to avoid error accumulation)
1746
+ # modified so that updated xtm1 is returned as well (to avoid error accumulation)
1746
1747
  mu_xt = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
1747
1748
  if variance > 0.0:
1748
1749
  noise = (prev_latents - mu_xt) / (variance ** (0.5) * eta)