diffusers 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (478) hide show
  1. diffusers/__init__.py +48 -1
  2. diffusers/commands/__init__.py +1 -1
  3. diffusers/commands/diffusers_cli.py +1 -1
  4. diffusers/commands/env.py +1 -1
  5. diffusers/commands/fp16_safetensors.py +1 -1
  6. diffusers/dependency_versions_check.py +1 -1
  7. diffusers/dependency_versions_table.py +1 -1
  8. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  9. diffusers/hooks/faster_cache.py +2 -2
  10. diffusers/hooks/group_offloading.py +128 -29
  11. diffusers/hooks/hooks.py +2 -2
  12. diffusers/hooks/layerwise_casting.py +3 -3
  13. diffusers/hooks/pyramid_attention_broadcast.py +1 -1
  14. diffusers/image_processor.py +7 -2
  15. diffusers/loaders/__init__.py +4 -0
  16. diffusers/loaders/ip_adapter.py +5 -14
  17. diffusers/loaders/lora_base.py +212 -111
  18. diffusers/loaders/lora_conversion_utils.py +275 -34
  19. diffusers/loaders/lora_pipeline.py +1554 -819
  20. diffusers/loaders/peft.py +52 -109
  21. diffusers/loaders/single_file.py +2 -2
  22. diffusers/loaders/single_file_model.py +20 -4
  23. diffusers/loaders/single_file_utils.py +225 -5
  24. diffusers/loaders/textual_inversion.py +3 -2
  25. diffusers/loaders/transformer_flux.py +1 -1
  26. diffusers/loaders/transformer_sd3.py +2 -2
  27. diffusers/loaders/unet.py +2 -16
  28. diffusers/loaders/unet_loader_utils.py +1 -1
  29. diffusers/loaders/utils.py +1 -1
  30. diffusers/models/__init__.py +15 -1
  31. diffusers/models/activations.py +5 -5
  32. diffusers/models/adapter.py +2 -3
  33. diffusers/models/attention.py +4 -4
  34. diffusers/models/attention_flax.py +10 -10
  35. diffusers/models/attention_processor.py +14 -10
  36. diffusers/models/auto_model.py +47 -10
  37. diffusers/models/autoencoders/__init__.py +1 -0
  38. diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
  39. diffusers/models/autoencoders/autoencoder_dc.py +3 -3
  40. diffusers/models/autoencoders/autoencoder_kl.py +4 -4
  41. diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
  42. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
  43. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
  44. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
  45. diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
  46. diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
  47. diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
  48. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
  49. diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
  50. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
  51. diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
  52. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  53. diffusers/models/autoencoders/vae.py +13 -2
  54. diffusers/models/autoencoders/vq_model.py +2 -2
  55. diffusers/models/cache_utils.py +1 -1
  56. diffusers/models/controlnet.py +1 -1
  57. diffusers/models/controlnet_flux.py +1 -1
  58. diffusers/models/controlnet_sd3.py +1 -1
  59. diffusers/models/controlnet_sparsectrl.py +1 -1
  60. diffusers/models/controlnets/__init__.py +1 -0
  61. diffusers/models/controlnets/controlnet.py +3 -3
  62. diffusers/models/controlnets/controlnet_flax.py +1 -1
  63. diffusers/models/controlnets/controlnet_flux.py +16 -15
  64. diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
  65. diffusers/models/controlnets/controlnet_sana.py +290 -0
  66. diffusers/models/controlnets/controlnet_sd3.py +1 -1
  67. diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
  68. diffusers/models/controlnets/controlnet_union.py +1 -1
  69. diffusers/models/controlnets/controlnet_xs.py +7 -7
  70. diffusers/models/controlnets/multicontrolnet.py +4 -5
  71. diffusers/models/controlnets/multicontrolnet_union.py +5 -6
  72. diffusers/models/downsampling.py +2 -2
  73. diffusers/models/embeddings.py +10 -12
  74. diffusers/models/embeddings_flax.py +2 -2
  75. diffusers/models/lora.py +3 -3
  76. diffusers/models/modeling_utils.py +44 -14
  77. diffusers/models/normalization.py +4 -4
  78. diffusers/models/resnet.py +2 -2
  79. diffusers/models/resnet_flax.py +1 -1
  80. diffusers/models/transformers/__init__.py +5 -0
  81. diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
  82. diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
  83. diffusers/models/transformers/consisid_transformer_3d.py +1 -1
  84. diffusers/models/transformers/dit_transformer_2d.py +2 -2
  85. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  86. diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
  87. diffusers/models/transformers/latte_transformer_3d.py +4 -5
  88. diffusers/models/transformers/lumina_nextdit2d.py +2 -2
  89. diffusers/models/transformers/pixart_transformer_2d.py +3 -3
  90. diffusers/models/transformers/prior_transformer.py +1 -1
  91. diffusers/models/transformers/sana_transformer.py +8 -3
  92. diffusers/models/transformers/stable_audio_transformer.py +5 -9
  93. diffusers/models/transformers/t5_film_transformer.py +3 -3
  94. diffusers/models/transformers/transformer_2d.py +1 -1
  95. diffusers/models/transformers/transformer_allegro.py +1 -1
  96. diffusers/models/transformers/transformer_chroma.py +742 -0
  97. diffusers/models/transformers/transformer_cogview3plus.py +5 -10
  98. diffusers/models/transformers/transformer_cogview4.py +317 -25
  99. diffusers/models/transformers/transformer_cosmos.py +579 -0
  100. diffusers/models/transformers/transformer_flux.py +9 -11
  101. diffusers/models/transformers/transformer_hidream_image.py +942 -0
  102. diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
  103. diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
  104. diffusers/models/transformers/transformer_ltx.py +2 -2
  105. diffusers/models/transformers/transformer_lumina2.py +1 -1
  106. diffusers/models/transformers/transformer_mochi.py +1 -1
  107. diffusers/models/transformers/transformer_omnigen.py +2 -2
  108. diffusers/models/transformers/transformer_sd3.py +7 -7
  109. diffusers/models/transformers/transformer_temporal.py +1 -1
  110. diffusers/models/transformers/transformer_wan.py +24 -8
  111. diffusers/models/transformers/transformer_wan_vace.py +393 -0
  112. diffusers/models/unets/unet_1d.py +1 -1
  113. diffusers/models/unets/unet_1d_blocks.py +1 -1
  114. diffusers/models/unets/unet_2d.py +1 -1
  115. diffusers/models/unets/unet_2d_blocks.py +1 -1
  116. diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
  117. diffusers/models/unets/unet_2d_condition.py +2 -2
  118. diffusers/models/unets/unet_2d_condition_flax.py +2 -2
  119. diffusers/models/unets/unet_3d_blocks.py +1 -1
  120. diffusers/models/unets/unet_3d_condition.py +3 -3
  121. diffusers/models/unets/unet_i2vgen_xl.py +3 -3
  122. diffusers/models/unets/unet_kandinsky3.py +1 -1
  123. diffusers/models/unets/unet_motion_model.py +2 -2
  124. diffusers/models/unets/unet_stable_cascade.py +1 -1
  125. diffusers/models/upsampling.py +2 -2
  126. diffusers/models/vae_flax.py +2 -2
  127. diffusers/models/vq_model.py +1 -1
  128. diffusers/pipelines/__init__.py +37 -6
  129. diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
  130. diffusers/pipelines/amused/pipeline_amused.py +7 -6
  131. diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
  132. diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
  133. diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
  134. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
  135. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
  136. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
  137. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
  138. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
  139. diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
  140. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  141. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
  142. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
  143. diffusers/pipelines/auto_pipeline.py +6 -7
  144. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  145. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  146. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
  147. diffusers/pipelines/chroma/__init__.py +49 -0
  148. diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
  149. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
  150. diffusers/pipelines/chroma/pipeline_output.py +21 -0
  151. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
  152. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
  153. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
  154. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
  155. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
  156. diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
  157. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
  158. diffusers/pipelines/consisid/consisid_utils.py +2 -2
  159. diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
  160. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  161. diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
  162. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
  163. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
  164. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
  165. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
  166. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
  167. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
  168. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
  169. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
  170. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
  171. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  172. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
  173. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
  174. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  175. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
  176. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
  177. diffusers/pipelines/cosmos/__init__.py +54 -0
  178. diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
  179. diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
  180. diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
  181. diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
  182. diffusers/pipelines/cosmos/pipeline_output.py +40 -0
  183. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
  184. diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
  185. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  186. diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
  187. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
  188. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
  189. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
  190. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
  191. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
  192. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
  193. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
  194. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  195. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
  196. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  197. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
  198. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
  199. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  200. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  201. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  202. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  203. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  204. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
  205. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
  206. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
  207. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
  208. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
  209. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
  210. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  211. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
  212. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
  213. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
  214. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
  215. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
  216. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  217. diffusers/pipelines/dit/pipeline_dit.py +1 -1
  218. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
  219. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
  220. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
  221. diffusers/pipelines/flux/modeling_flux.py +1 -1
  222. diffusers/pipelines/flux/pipeline_flux.py +10 -17
  223. diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
  224. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
  225. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
  226. diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
  227. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
  228. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
  229. diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
  230. diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
  231. diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
  232. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
  233. diffusers/pipelines/free_init_utils.py +2 -2
  234. diffusers/pipelines/free_noise_utils.py +3 -3
  235. diffusers/pipelines/hidream_image/__init__.py +47 -0
  236. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
  237. diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
  238. diffusers/pipelines/hunyuan_video/__init__.py +2 -0
  239. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
  240. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
  241. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
  242. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
  243. diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
  244. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
  245. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
  246. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
  247. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
  248. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
  249. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
  250. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
  251. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  252. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
  253. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
  254. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
  255. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
  256. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
  257. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
  258. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
  259. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
  260. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
  261. diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
  262. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
  263. diffusers/pipelines/kolors/text_encoder.py +3 -3
  264. diffusers/pipelines/kolors/tokenizer.py +1 -1
  265. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
  266. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
  267. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  268. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
  269. diffusers/pipelines/latte/pipeline_latte.py +12 -12
  270. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
  271. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
  272. diffusers/pipelines/ltx/__init__.py +4 -0
  273. diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
  274. diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
  275. diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
  276. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
  277. diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
  278. diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
  279. diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
  280. diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
  281. diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
  282. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
  283. diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
  284. diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
  285. diffusers/pipelines/onnx_utils.py +15 -2
  286. diffusers/pipelines/pag/pag_utils.py +2 -2
  287. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
  288. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
  289. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
  290. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
  291. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
  292. diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
  293. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
  294. diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
  295. diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
  296. diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
  297. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
  298. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
  299. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
  300. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
  301. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
  302. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
  303. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
  304. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  305. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
  306. diffusers/pipelines/pia/pipeline_pia.py +8 -6
  307. diffusers/pipelines/pipeline_flax_utils.py +3 -4
  308. diffusers/pipelines/pipeline_loading_utils.py +89 -13
  309. diffusers/pipelines/pipeline_utils.py +105 -33
  310. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
  311. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
  312. diffusers/pipelines/sana/__init__.py +4 -0
  313. diffusers/pipelines/sana/pipeline_sana.py +23 -21
  314. diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
  315. diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
  316. diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
  317. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
  318. diffusers/pipelines/shap_e/camera.py +1 -1
  319. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  320. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  321. diffusers/pipelines/shap_e/renderer.py +3 -3
  322. diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
  323. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
  324. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
  325. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
  326. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
  327. diffusers/pipelines/stable_diffusion/__init__.py +0 -7
  328. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  329. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
  330. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  331. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
  332. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  333. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
  334. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
  335. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
  336. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
  337. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
  338. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
  339. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
  340. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
  341. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
  342. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
  343. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
  344. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
  345. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
  346. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
  347. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  348. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  349. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  350. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
  351. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
  352. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
  353. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
  354. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
  355. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
  356. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
  357. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
  358. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
  359. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
  360. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
  361. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
  362. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  363. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
  364. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  365. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
  366. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
  367. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
  368. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
  369. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
  370. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
  371. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
  372. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
  373. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
  374. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
  375. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
  376. diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
  377. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
  378. diffusers/pipelines/unclip/text_proj.py +2 -2
  379. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
  380. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  381. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
  382. diffusers/pipelines/visualcloze/__init__.py +52 -0
  383. diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
  384. diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
  385. diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
  386. diffusers/pipelines/wan/__init__.py +2 -0
  387. diffusers/pipelines/wan/pipeline_wan.py +17 -12
  388. diffusers/pipelines/wan/pipeline_wan_i2v.py +42 -20
  389. diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
  390. diffusers/pipelines/wan/pipeline_wan_video2video.py +18 -18
  391. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  392. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
  393. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  394. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  395. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
  396. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
  397. diffusers/quantizers/__init__.py +179 -1
  398. diffusers/quantizers/base.py +6 -1
  399. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
  400. diffusers/quantizers/bitsandbytes/utils.py +10 -7
  401. diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
  402. diffusers/quantizers/gguf/utils.py +16 -13
  403. diffusers/quantizers/quantization_config.py +18 -16
  404. diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
  405. diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
  406. diffusers/schedulers/__init__.py +3 -1
  407. diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
  408. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  409. diffusers/schedulers/scheduling_consistency_models.py +1 -1
  410. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
  411. diffusers/schedulers/scheduling_ddim.py +8 -8
  412. diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
  413. diffusers/schedulers/scheduling_ddim_flax.py +6 -6
  414. diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
  415. diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
  416. diffusers/schedulers/scheduling_ddpm.py +9 -9
  417. diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
  418. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
  419. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
  420. diffusers/schedulers/scheduling_deis_multistep.py +8 -8
  421. diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
  422. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
  423. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
  424. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
  425. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  426. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
  427. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
  428. diffusers/schedulers/scheduling_edm_euler.py +20 -11
  429. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
  430. diffusers/schedulers/scheduling_euler_discrete.py +3 -3
  431. diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
  432. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
  433. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
  434. diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
  435. diffusers/schedulers/scheduling_heun_discrete.py +2 -2
  436. diffusers/schedulers/scheduling_ipndm.py +2 -2
  437. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
  438. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
  439. diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
  440. diffusers/schedulers/scheduling_lcm.py +3 -3
  441. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  442. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  443. diffusers/schedulers/scheduling_pndm.py +4 -4
  444. diffusers/schedulers/scheduling_pndm_flax.py +4 -4
  445. diffusers/schedulers/scheduling_repaint.py +9 -9
  446. diffusers/schedulers/scheduling_sasolver.py +15 -15
  447. diffusers/schedulers/scheduling_scm.py +1 -1
  448. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  449. diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
  450. diffusers/schedulers/scheduling_tcd.py +3 -3
  451. diffusers/schedulers/scheduling_unclip.py +5 -5
  452. diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
  453. diffusers/schedulers/scheduling_utils.py +1 -1
  454. diffusers/schedulers/scheduling_utils_flax.py +1 -1
  455. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  456. diffusers/training_utils.py +13 -5
  457. diffusers/utils/__init__.py +5 -0
  458. diffusers/utils/accelerate_utils.py +1 -1
  459. diffusers/utils/doc_utils.py +1 -1
  460. diffusers/utils/dummy_pt_objects.py +120 -0
  461. diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
  462. diffusers/utils/dynamic_modules_utils.py +21 -3
  463. diffusers/utils/export_utils.py +1 -1
  464. diffusers/utils/import_utils.py +81 -18
  465. diffusers/utils/logging.py +1 -1
  466. diffusers/utils/outputs.py +2 -1
  467. diffusers/utils/peft_utils.py +91 -8
  468. diffusers/utils/state_dict_utils.py +20 -3
  469. diffusers/utils/testing_utils.py +59 -7
  470. diffusers/utils/torch_utils.py +25 -5
  471. diffusers/video_processor.py +2 -2
  472. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/METADATA +3 -3
  473. diffusers-0.34.0.dist-info/RECORD +639 -0
  474. diffusers-0.33.0.dist-info/RECORD +0 -608
  475. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
  476. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/WHEEL +0 -0
  477. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
  478. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,21 @@
1
+ from dataclasses import dataclass
2
+ from typing import List, Union
3
+
4
+ import numpy as np
5
+ import PIL.Image
6
+
7
+ from ...utils import BaseOutput
8
+
9
+
10
+ @dataclass
11
+ class ChromaPipelineOutput(BaseOutput):
12
+ """
13
+ Output class for Stable Diffusion pipelines.
14
+
15
+ Args:
16
+ images (`List[PIL.Image.Image]` or `np.ndarray`)
17
+ List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
18
+ num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
19
+ """
20
+
21
+ images: Union[List[PIL.Image.Image], np.ndarray]
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
1
+ # Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
2
2
  # All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -359,7 +359,7 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
359
359
  def prepare_extra_step_kwargs(self, generator, eta):
360
360
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
361
361
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
362
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
362
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
363
363
  # and should be between [0, 1]
364
364
 
365
365
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -558,11 +558,11 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
558
558
  in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
559
559
  passed will be used. Must be in descending order.
560
560
  guidance_scale (`float`, *optional*, defaults to 7.0):
561
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
562
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
563
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
564
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
565
- usually at the expense of lower image quality.
561
+ Guidance scale as defined in [Classifier-Free Diffusion
562
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
563
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
564
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
565
+ the text `prompt`, usually at the expense of lower image quality.
566
566
  num_videos_per_prompt (`int`, *optional*, defaults to 1):
567
567
  The number of videos to generate per prompt.
568
568
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -645,7 +645,7 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
645
645
  device = self._execution_device
646
646
 
647
647
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
648
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
648
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
649
649
  # corresponds to doing no classifier free guidance.
650
650
  do_classifier_free_guidance = guidance_scale > 1.0
651
651
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The CogVideoX team, Tsinghua University & ZhipuAI, Alibaba-PAI and The HuggingFace Team.
1
+ # Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI, Alibaba-PAI and The HuggingFace Team.
2
2
  # All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -398,7 +398,7 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
398
398
  def prepare_extra_step_kwargs(self, generator, eta):
399
399
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
400
400
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
401
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
401
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
402
402
  # and should be between [0, 1]
403
403
 
404
404
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -603,11 +603,11 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
603
603
  in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
604
604
  passed will be used. Must be in descending order.
605
605
  guidance_scale (`float`, *optional*, defaults to 6.0):
606
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
607
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
608
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
609
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
610
- usually at the expense of lower image quality.
606
+ Guidance scale as defined in [Classifier-Free Diffusion
607
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
608
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
609
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
610
+ the text `prompt`, usually at the expense of lower image quality.
611
611
  num_videos_per_prompt (`int`, *optional*, defaults to 1):
612
612
  The number of videos to generate per prompt.
613
613
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -698,7 +698,7 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
698
698
  device = self._execution_device
699
699
 
700
700
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
701
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
701
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
702
702
  # corresponds to doing no classifier free guidance.
703
703
  do_classifier_free_guidance = guidance_scale > 1.0
704
704
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
1
+ # Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
2
2
  # All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -442,7 +442,7 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
442
442
  def prepare_extra_step_kwargs(self, generator, eta):
443
443
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
444
444
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
445
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
445
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
446
446
  # and should be between [0, 1]
447
447
 
448
448
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -658,11 +658,11 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
658
658
  in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
659
659
  passed will be used. Must be in descending order.
660
660
  guidance_scale (`float`, *optional*, defaults to 7.0):
661
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
662
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
663
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
664
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
665
- usually at the expense of lower image quality.
661
+ Guidance scale as defined in [Classifier-Free Diffusion
662
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
663
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
664
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
665
+ the text `prompt`, usually at the expense of lower image quality.
666
666
  num_videos_per_prompt (`int`, *optional*, defaults to 1):
667
667
  The number of videos to generate per prompt.
668
668
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -747,7 +747,7 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
747
747
  device = self._execution_device
748
748
 
749
749
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
750
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
750
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
751
751
  # corresponds to doing no classifier free guidance.
752
752
  do_classifier_free_guidance = guidance_scale > 1.0
753
753
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
1
+ # Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
2
2
  # All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -418,7 +418,7 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
418
418
  def prepare_extra_step_kwargs(self, generator, eta):
419
419
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
420
420
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
421
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
421
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
422
422
  # and should be between [0, 1]
423
423
 
424
424
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -628,11 +628,11 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
628
628
  strength (`float`, *optional*, defaults to 0.8):
629
629
  Higher strength leads to more differences between original video and generated video.
630
630
  guidance_scale (`float`, *optional*, defaults to 7.0):
631
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
632
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
633
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
634
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
635
- usually at the expense of lower image quality.
631
+ Guidance scale as defined in [Classifier-Free Diffusion
632
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
633
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
634
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
635
+ the text `prompt`, usually at the expense of lower image quality.
636
636
  num_videos_per_prompt (`int`, *optional*, defaults to 1):
637
637
  The number of videos to generate per prompt.
638
638
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -718,7 +718,7 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
718
718
  device = self._execution_device
719
719
 
720
720
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
721
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
721
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
722
722
  # corresponds to doing no classifier free guidance.
723
723
  do_classifier_free_guidance = guidance_scale > 1.0
724
724
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
1
+ # Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
2
2
  # All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -319,7 +319,7 @@ class CogView3PlusPipeline(DiffusionPipeline):
319
319
  def prepare_extra_step_kwargs(self, generator, eta):
320
320
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
321
321
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
322
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
322
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
323
323
  # and should be between [0, 1]
324
324
 
325
325
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -390,7 +390,7 @@ class CogView3PlusPipeline(DiffusionPipeline):
390
390
  return self._guidance_scale
391
391
 
392
392
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
393
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
393
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
394
394
  # corresponds to doing no classifier free guidance.
395
395
  @property
396
396
  def do_classifier_free_guidance(self):
@@ -453,11 +453,11 @@ class CogView3PlusPipeline(DiffusionPipeline):
453
453
  in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
454
454
  passed will be used. Must be in descending order.
455
455
  guidance_scale (`float`, *optional*, defaults to `5.0`):
456
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
457
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
458
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
459
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
460
- usually at the expense of lower image quality.
456
+ Guidance scale as defined in [Classifier-Free Diffusion
457
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
458
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
459
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
460
+ the text `prompt`, usually at the expense of lower image quality.
461
461
  num_images_per_prompt (`int`, *optional*, defaults to `1`):
462
462
  The number of images to generate per prompt.
463
463
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -547,7 +547,7 @@ class CogView3PlusPipeline(DiffusionPipeline):
547
547
  device = self._execution_device
548
548
 
549
549
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
550
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
550
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
551
551
  # corresponds to doing no classifier free guidance.
552
552
  do_classifier_free_guidance = guidance_scale > 1.0
553
553
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
1
+ # Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
2
2
  # All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -377,7 +377,7 @@ class CogView4Pipeline(DiffusionPipeline, CogView4LoraLoaderMixin):
377
377
  return self._guidance_scale
378
378
 
379
379
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
380
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
380
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
381
381
  # corresponds to doing no classifier free guidance.
382
382
  @property
383
383
  def do_classifier_free_guidance(self):
@@ -453,11 +453,11 @@ class CogView4Pipeline(DiffusionPipeline, CogView4LoraLoaderMixin):
453
453
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
454
454
  will be used.
455
455
  guidance_scale (`float`, *optional*, defaults to `5.0`):
456
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
457
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
458
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
459
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
460
- usually at the expense of lower image quality.
456
+ Guidance scale as defined in [Classifier-Free Diffusion
457
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
458
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
459
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
460
+ the text `prompt`, usually at the expense of lower image quality.
461
461
  num_images_per_prompt (`int`, *optional*, defaults to `1`):
462
462
  The number of images to generate per prompt.
463
463
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
1
+ # Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
2
2
  # All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -409,7 +409,7 @@ class CogView4ControlPipeline(DiffusionPipeline):
409
409
  return self._guidance_scale
410
410
 
411
411
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
412
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
412
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
413
413
  # corresponds to doing no classifier free guidance.
414
414
  @property
415
415
  def do_classifier_free_guidance(self):
@@ -486,11 +486,11 @@ class CogView4ControlPipeline(DiffusionPipeline):
486
486
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
487
487
  will be used.
488
488
  guidance_scale (`float`, *optional*, defaults to `5.0`):
489
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
490
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
491
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
492
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
493
- usually at the expense of lower image quality.
489
+ Guidance scale as defined in [Classifier-Free Diffusion
490
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
491
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
492
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
493
+ the text `prompt`, usually at the expense of lower image quality.
494
494
  num_images_per_prompt (`int`, *optional*, defaults to `1`):
495
495
  The number of images to generate per prompt.
496
496
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -166,7 +166,7 @@ def process_face_embeddings(
166
166
  raise RuntimeError("facexlib align face fail")
167
167
  align_face = face_helper_1.cropped_faces[0] # (512, 512, 3) # RGB
168
168
 
169
- # incase insightface didn't detect face
169
+ # in case insightface didn't detect face
170
170
  if id_ante_embedding is None:
171
171
  logger.warning("Failed to detect face using insightface. Extracting embedding with align face")
172
172
  id_ante_embedding = face_helper_2.get_feat(align_face)
@@ -294,7 +294,7 @@ def prepare_face_models(model_path, device, dtype):
294
294
 
295
295
  Parameters:
296
296
  - model_path: Path to the directory containing model files.
297
- - device: The device (e.g., 'cuda', 'cpu') where models will be loaded.
297
+ - device: The device (e.g., 'cuda', 'xpu', 'cpu') where models will be loaded.
298
298
  - dtype: Data type (e.g., torch.float32) for model inference.
299
299
 
300
300
  Returns:
@@ -1,4 +1,4 @@
1
- # Copyright 2024 ConsisID Authors and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 ConsisID Authors and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -540,7 +540,7 @@ class ConsisIDPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
540
540
  def prepare_extra_step_kwargs(self, generator, eta):
541
541
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
542
542
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
543
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
543
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
544
544
  # and should be between [0, 1]
545
545
 
546
546
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -715,11 +715,11 @@ class ConsisIDPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
715
715
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
716
716
  expense of slower inference.
717
717
  guidance_scale (`float`, *optional*, defaults to 6):
718
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
719
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
720
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
721
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
722
- usually at the expense of lower image quality.
718
+ Guidance scale as defined in [Classifier-Free Diffusion
719
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
720
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
721
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
722
+ the text `prompt`, usually at the expense of lower image quality.
723
723
  use_dynamic_cfg (`bool`, *optional*, defaults to `False`):
724
724
  If True, dynamically adjusts the guidance scale during inference. This allows the model to use a
725
725
  progressive guidance scale, improving the balance between text-guided generation and image quality over
@@ -821,7 +821,7 @@ class ConsisIDPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
821
821
  device = self._execution_device
822
822
 
823
823
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
824
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
824
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
825
825
  # corresponds to doing no classifier free guidance.
826
826
  do_classifier_free_guidance = guidance_scale > 1.0
827
827
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -37,7 +37,7 @@ from ...utils import (
37
37
  scale_lora_layers,
38
38
  unscale_lora_layers,
39
39
  )
40
- from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
40
+ from ...utils.torch_utils import empty_device_cache, is_compiled_module, is_torch_version, randn_tensor
41
41
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
42
42
  from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
43
43
  from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -579,7 +579,7 @@ class StableDiffusionControlNetPipeline(
579
579
  def prepare_extra_step_kwargs(self, generator, eta):
580
580
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
581
581
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
582
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
582
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
583
583
  # and should be between [0, 1]
584
584
 
585
585
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -886,7 +886,7 @@ class StableDiffusionControlNetPipeline(
886
886
  return self._clip_skip
887
887
 
888
888
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
889
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
889
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
890
890
  # corresponds to doing no classifier free guidance.
891
891
  @property
892
892
  def do_classifier_free_guidance(self):
@@ -979,8 +979,8 @@ class StableDiffusionControlNetPipeline(
979
979
  num_images_per_prompt (`int`, *optional*, defaults to 1):
980
980
  The number of images to generate per prompt.
981
981
  eta (`float`, *optional*, defaults to 0.0):
982
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
983
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
982
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
983
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
984
984
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
985
985
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
986
986
  generation deterministic.
@@ -1339,7 +1339,7 @@ class StableDiffusionControlNetPipeline(
1339
1339
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
1340
1340
  self.unet.to("cpu")
1341
1341
  self.controlnet.to("cpu")
1342
- torch.cuda.empty_cache()
1342
+ empty_device_cache()
1343
1343
 
1344
1344
  if not output_type == "latent":
1345
1345
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[
@@ -1,5 +1,5 @@
1
- # Copyright 2024 Salesforce.com, inc.
2
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Salesforce.com, inc.
2
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
5
  # you may not use this file except in compliance with the License.
@@ -149,7 +149,7 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
149
149
  def get_query_embeddings(self, input_image, src_subject):
150
150
  return self.qformer(image_input=input_image, text_input=src_subject, return_dict=False)
151
151
 
152
- # from the original Blip Diffusion code, speciefies the target subject and augments the prompt by repeating it
152
+ # from the original Blip Diffusion code, specifies the target subject and augments the prompt by repeating it
153
153
  def _build_prompt(self, prompts, tgt_subjects, prompt_strength=1.0, prompt_reps=20):
154
154
  rv = []
155
155
  for prompt, tgt_subject in zip(prompts, tgt_subjects):
@@ -280,11 +280,11 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
280
280
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
281
281
  tensor will ge generated by random sampling.
282
282
  guidance_scale (`float`, *optional*, defaults to 7.5):
283
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
284
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
285
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
286
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
287
- usually at the expense of lower image quality.
283
+ Guidance scale as defined in [Classifier-Free Diffusion
284
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
285
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
286
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
287
+ the text `prompt`, usually at the expense of lower image quality.
288
288
  height (`int`, *optional*, defaults to 512):
289
289
  The height of the generated image.
290
290
  width (`int`, *optional*, defaults to 512):
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -36,7 +36,7 @@ from ...utils import (
36
36
  scale_lora_layers,
37
37
  unscale_lora_layers,
38
38
  )
39
- from ...utils.torch_utils import is_compiled_module, randn_tensor
39
+ from ...utils.torch_utils import empty_device_cache, is_compiled_module, randn_tensor
40
40
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
41
41
  from ..stable_diffusion import StableDiffusionPipelineOutput
42
42
  from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -557,7 +557,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
557
557
  def prepare_extra_step_kwargs(self, generator, eta):
558
558
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
559
559
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
560
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
560
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
561
561
  # and should be between [0, 1]
562
562
 
563
563
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -884,7 +884,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
884
884
  return self._clip_skip
885
885
 
886
886
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
887
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
887
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
888
888
  # corresponds to doing no classifier free guidance.
889
889
  @property
890
890
  def do_classifier_free_guidance(self):
@@ -977,8 +977,8 @@ class StableDiffusionControlNetImg2ImgPipeline(
977
977
  num_images_per_prompt (`int`, *optional*, defaults to 1):
978
978
  The number of images to generate per prompt.
979
979
  eta (`float`, *optional*, defaults to 0.0):
980
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
981
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
980
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
981
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
982
982
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
983
983
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
984
984
  generation deterministic.
@@ -1311,7 +1311,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
1311
1311
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
1312
1312
  self.unet.to("cpu")
1313
1313
  self.controlnet.to("cpu")
1314
- torch.cuda.empty_cache()
1314
+ empty_device_cache()
1315
1315
 
1316
1316
  if not output_type == "latent":
1317
1317
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -38,7 +38,7 @@ from ...utils import (
38
38
  scale_lora_layers,
39
39
  unscale_lora_layers,
40
40
  )
41
- from ...utils.torch_utils import is_compiled_module, randn_tensor
41
+ from ...utils.torch_utils import empty_device_cache, is_compiled_module, randn_tensor
42
42
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
43
43
  from ..stable_diffusion import StableDiffusionPipelineOutput
44
44
  from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -566,7 +566,7 @@ class StableDiffusionControlNetInpaintPipeline(
566
566
  def prepare_extra_step_kwargs(self, generator, eta):
567
567
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
568
568
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
569
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
569
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
570
570
  # and should be between [0, 1]
571
571
 
572
572
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -976,7 +976,7 @@ class StableDiffusionControlNetInpaintPipeline(
976
976
  return self._clip_skip
977
977
 
978
978
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
979
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
979
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
980
980
  # corresponds to doing no classifier free guidance.
981
981
  @property
982
982
  def do_classifier_free_guidance(self):
@@ -1089,8 +1089,8 @@ class StableDiffusionControlNetInpaintPipeline(
1089
1089
  num_images_per_prompt (`int`, *optional*, defaults to 1):
1090
1090
  The number of images to generate per prompt.
1091
1091
  eta (`float`, *optional*, defaults to 0.0):
1092
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
1093
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
1092
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
1093
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
1094
1094
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
1095
1095
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
1096
1096
  generation deterministic.
@@ -1500,7 +1500,7 @@ class StableDiffusionControlNetInpaintPipeline(
1500
1500
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
1501
1501
  self.unet.to("cpu")
1502
1502
  self.controlnet.to("cpu")
1503
- torch.cuda.empty_cache()
1503
+ empty_device_cache()
1504
1504
 
1505
1505
  if not output_type == "latent":
1506
1506
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[