diffusers 0.33.1__py3-none-any.whl → 0.35.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (551) hide show
  1. diffusers/__init__.py +145 -1
  2. diffusers/callbacks.py +35 -0
  3. diffusers/commands/__init__.py +1 -1
  4. diffusers/commands/custom_blocks.py +134 -0
  5. diffusers/commands/diffusers_cli.py +3 -1
  6. diffusers/commands/env.py +1 -1
  7. diffusers/commands/fp16_safetensors.py +2 -2
  8. diffusers/configuration_utils.py +11 -2
  9. diffusers/dependency_versions_check.py +1 -1
  10. diffusers/dependency_versions_table.py +3 -3
  11. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  12. diffusers/guiders/__init__.py +41 -0
  13. diffusers/guiders/adaptive_projected_guidance.py +188 -0
  14. diffusers/guiders/auto_guidance.py +190 -0
  15. diffusers/guiders/classifier_free_guidance.py +141 -0
  16. diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
  17. diffusers/guiders/frequency_decoupled_guidance.py +327 -0
  18. diffusers/guiders/guider_utils.py +309 -0
  19. diffusers/guiders/perturbed_attention_guidance.py +271 -0
  20. diffusers/guiders/skip_layer_guidance.py +262 -0
  21. diffusers/guiders/smoothed_energy_guidance.py +251 -0
  22. diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
  23. diffusers/hooks/__init__.py +17 -0
  24. diffusers/hooks/_common.py +56 -0
  25. diffusers/hooks/_helpers.py +293 -0
  26. diffusers/hooks/faster_cache.py +9 -8
  27. diffusers/hooks/first_block_cache.py +259 -0
  28. diffusers/hooks/group_offloading.py +332 -227
  29. diffusers/hooks/hooks.py +58 -3
  30. diffusers/hooks/layer_skip.py +263 -0
  31. diffusers/hooks/layerwise_casting.py +5 -10
  32. diffusers/hooks/pyramid_attention_broadcast.py +15 -12
  33. diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
  34. diffusers/hooks/utils.py +43 -0
  35. diffusers/image_processor.py +7 -2
  36. diffusers/loaders/__init__.py +10 -0
  37. diffusers/loaders/ip_adapter.py +260 -18
  38. diffusers/loaders/lora_base.py +261 -127
  39. diffusers/loaders/lora_conversion_utils.py +657 -35
  40. diffusers/loaders/lora_pipeline.py +2778 -1246
  41. diffusers/loaders/peft.py +78 -112
  42. diffusers/loaders/single_file.py +2 -2
  43. diffusers/loaders/single_file_model.py +64 -15
  44. diffusers/loaders/single_file_utils.py +395 -7
  45. diffusers/loaders/textual_inversion.py +3 -2
  46. diffusers/loaders/transformer_flux.py +10 -11
  47. diffusers/loaders/transformer_sd3.py +8 -3
  48. diffusers/loaders/unet.py +24 -21
  49. diffusers/loaders/unet_loader_utils.py +6 -3
  50. diffusers/loaders/utils.py +1 -1
  51. diffusers/models/__init__.py +23 -1
  52. diffusers/models/activations.py +5 -5
  53. diffusers/models/adapter.py +2 -3
  54. diffusers/models/attention.py +488 -7
  55. diffusers/models/attention_dispatch.py +1218 -0
  56. diffusers/models/attention_flax.py +10 -10
  57. diffusers/models/attention_processor.py +113 -667
  58. diffusers/models/auto_model.py +49 -12
  59. diffusers/models/autoencoders/__init__.py +2 -0
  60. diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
  61. diffusers/models/autoencoders/autoencoder_dc.py +17 -4
  62. diffusers/models/autoencoders/autoencoder_kl.py +5 -5
  63. diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
  64. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
  65. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1110 -0
  66. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
  67. diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
  68. diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
  69. diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
  70. diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
  71. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
  72. diffusers/models/autoencoders/autoencoder_kl_wan.py +626 -62
  73. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
  74. diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
  75. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  76. diffusers/models/autoencoders/vae.py +13 -2
  77. diffusers/models/autoencoders/vq_model.py +2 -2
  78. diffusers/models/cache_utils.py +32 -10
  79. diffusers/models/controlnet.py +1 -1
  80. diffusers/models/controlnet_flux.py +1 -1
  81. diffusers/models/controlnet_sd3.py +1 -1
  82. diffusers/models/controlnet_sparsectrl.py +1 -1
  83. diffusers/models/controlnets/__init__.py +1 -0
  84. diffusers/models/controlnets/controlnet.py +3 -3
  85. diffusers/models/controlnets/controlnet_flax.py +1 -1
  86. diffusers/models/controlnets/controlnet_flux.py +21 -20
  87. diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
  88. diffusers/models/controlnets/controlnet_sana.py +290 -0
  89. diffusers/models/controlnets/controlnet_sd3.py +1 -1
  90. diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
  91. diffusers/models/controlnets/controlnet_union.py +5 -5
  92. diffusers/models/controlnets/controlnet_xs.py +7 -7
  93. diffusers/models/controlnets/multicontrolnet.py +4 -5
  94. diffusers/models/controlnets/multicontrolnet_union.py +5 -6
  95. diffusers/models/downsampling.py +2 -2
  96. diffusers/models/embeddings.py +36 -46
  97. diffusers/models/embeddings_flax.py +2 -2
  98. diffusers/models/lora.py +3 -3
  99. diffusers/models/model_loading_utils.py +233 -1
  100. diffusers/models/modeling_flax_utils.py +1 -2
  101. diffusers/models/modeling_utils.py +203 -108
  102. diffusers/models/normalization.py +4 -4
  103. diffusers/models/resnet.py +2 -2
  104. diffusers/models/resnet_flax.py +1 -1
  105. diffusers/models/transformers/__init__.py +7 -0
  106. diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
  107. diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
  108. diffusers/models/transformers/consisid_transformer_3d.py +1 -1
  109. diffusers/models/transformers/dit_transformer_2d.py +2 -2
  110. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  111. diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
  112. diffusers/models/transformers/latte_transformer_3d.py +4 -5
  113. diffusers/models/transformers/lumina_nextdit2d.py +2 -2
  114. diffusers/models/transformers/pixart_transformer_2d.py +3 -3
  115. diffusers/models/transformers/prior_transformer.py +1 -1
  116. diffusers/models/transformers/sana_transformer.py +8 -3
  117. diffusers/models/transformers/stable_audio_transformer.py +5 -9
  118. diffusers/models/transformers/t5_film_transformer.py +3 -3
  119. diffusers/models/transformers/transformer_2d.py +1 -1
  120. diffusers/models/transformers/transformer_allegro.py +1 -1
  121. diffusers/models/transformers/transformer_chroma.py +641 -0
  122. diffusers/models/transformers/transformer_cogview3plus.py +5 -10
  123. diffusers/models/transformers/transformer_cogview4.py +353 -27
  124. diffusers/models/transformers/transformer_cosmos.py +586 -0
  125. diffusers/models/transformers/transformer_flux.py +376 -138
  126. diffusers/models/transformers/transformer_hidream_image.py +942 -0
  127. diffusers/models/transformers/transformer_hunyuan_video.py +12 -8
  128. diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
  129. diffusers/models/transformers/transformer_ltx.py +105 -24
  130. diffusers/models/transformers/transformer_lumina2.py +1 -1
  131. diffusers/models/transformers/transformer_mochi.py +1 -1
  132. diffusers/models/transformers/transformer_omnigen.py +2 -2
  133. diffusers/models/transformers/transformer_qwenimage.py +645 -0
  134. diffusers/models/transformers/transformer_sd3.py +7 -7
  135. diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
  136. diffusers/models/transformers/transformer_temporal.py +1 -1
  137. diffusers/models/transformers/transformer_wan.py +316 -87
  138. diffusers/models/transformers/transformer_wan_vace.py +387 -0
  139. diffusers/models/unets/unet_1d.py +1 -1
  140. diffusers/models/unets/unet_1d_blocks.py +1 -1
  141. diffusers/models/unets/unet_2d.py +1 -1
  142. diffusers/models/unets/unet_2d_blocks.py +1 -1
  143. diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
  144. diffusers/models/unets/unet_2d_condition.py +4 -3
  145. diffusers/models/unets/unet_2d_condition_flax.py +2 -2
  146. diffusers/models/unets/unet_3d_blocks.py +1 -1
  147. diffusers/models/unets/unet_3d_condition.py +3 -3
  148. diffusers/models/unets/unet_i2vgen_xl.py +3 -3
  149. diffusers/models/unets/unet_kandinsky3.py +1 -1
  150. diffusers/models/unets/unet_motion_model.py +2 -2
  151. diffusers/models/unets/unet_stable_cascade.py +1 -1
  152. diffusers/models/upsampling.py +2 -2
  153. diffusers/models/vae_flax.py +2 -2
  154. diffusers/models/vq_model.py +1 -1
  155. diffusers/modular_pipelines/__init__.py +83 -0
  156. diffusers/modular_pipelines/components_manager.py +1068 -0
  157. diffusers/modular_pipelines/flux/__init__.py +66 -0
  158. diffusers/modular_pipelines/flux/before_denoise.py +689 -0
  159. diffusers/modular_pipelines/flux/decoders.py +109 -0
  160. diffusers/modular_pipelines/flux/denoise.py +227 -0
  161. diffusers/modular_pipelines/flux/encoders.py +412 -0
  162. diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
  163. diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
  164. diffusers/modular_pipelines/modular_pipeline.py +2446 -0
  165. diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
  166. diffusers/modular_pipelines/node_utils.py +665 -0
  167. diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
  168. diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
  169. diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
  170. diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
  171. diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
  172. diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
  173. diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
  174. diffusers/modular_pipelines/wan/__init__.py +66 -0
  175. diffusers/modular_pipelines/wan/before_denoise.py +365 -0
  176. diffusers/modular_pipelines/wan/decoders.py +105 -0
  177. diffusers/modular_pipelines/wan/denoise.py +261 -0
  178. diffusers/modular_pipelines/wan/encoders.py +242 -0
  179. diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
  180. diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
  181. diffusers/pipelines/__init__.py +68 -6
  182. diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
  183. diffusers/pipelines/amused/pipeline_amused.py +7 -6
  184. diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
  185. diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
  186. diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
  187. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
  188. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
  189. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
  190. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
  191. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
  192. diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
  193. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  194. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +22 -13
  195. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
  196. diffusers/pipelines/auto_pipeline.py +23 -20
  197. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  198. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  199. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
  200. diffusers/pipelines/chroma/__init__.py +49 -0
  201. diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
  202. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
  203. diffusers/pipelines/chroma/pipeline_output.py +21 -0
  204. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +17 -16
  205. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +17 -16
  206. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +18 -17
  207. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +17 -16
  208. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
  209. diffusers/pipelines/cogview4/pipeline_cogview4.py +23 -22
  210. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
  211. diffusers/pipelines/consisid/consisid_utils.py +2 -2
  212. diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
  213. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  214. diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
  215. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +11 -10
  216. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
  217. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
  218. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
  219. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
  220. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
  221. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +226 -107
  222. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +12 -8
  223. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +207 -105
  224. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  225. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
  226. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
  227. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  228. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
  229. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
  230. diffusers/pipelines/cosmos/__init__.py +54 -0
  231. diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
  232. diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
  233. diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
  234. diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
  235. diffusers/pipelines/cosmos/pipeline_output.py +40 -0
  236. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
  237. diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
  238. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  239. diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
  240. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
  241. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
  242. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
  243. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
  244. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
  245. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
  246. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
  247. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  248. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
  249. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  250. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
  251. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
  252. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  253. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  254. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  255. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  256. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  257. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +8 -8
  258. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
  259. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
  260. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
  261. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
  262. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
  263. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  264. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
  265. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
  266. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
  267. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
  268. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
  269. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  270. diffusers/pipelines/dit/pipeline_dit.py +4 -2
  271. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
  272. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
  273. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
  274. diffusers/pipelines/flux/__init__.py +4 -0
  275. diffusers/pipelines/flux/modeling_flux.py +1 -1
  276. diffusers/pipelines/flux/pipeline_flux.py +37 -36
  277. diffusers/pipelines/flux/pipeline_flux_control.py +9 -9
  278. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +7 -7
  279. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +7 -7
  280. diffusers/pipelines/flux/pipeline_flux_controlnet.py +7 -7
  281. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +31 -23
  282. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +3 -2
  283. diffusers/pipelines/flux/pipeline_flux_fill.py +7 -7
  284. diffusers/pipelines/flux/pipeline_flux_img2img.py +40 -7
  285. diffusers/pipelines/flux/pipeline_flux_inpaint.py +12 -7
  286. diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
  287. diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
  288. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +2 -2
  289. diffusers/pipelines/flux/pipeline_output.py +6 -4
  290. diffusers/pipelines/free_init_utils.py +2 -2
  291. diffusers/pipelines/free_noise_utils.py +3 -3
  292. diffusers/pipelines/hidream_image/__init__.py +47 -0
  293. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
  294. diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
  295. diffusers/pipelines/hunyuan_video/__init__.py +2 -0
  296. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
  297. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +26 -25
  298. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
  299. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
  300. diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
  301. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
  302. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
  303. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
  304. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
  305. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
  306. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
  307. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
  308. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  309. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
  310. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
  311. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
  312. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
  313. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
  314. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
  315. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
  316. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
  317. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
  318. diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
  319. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
  320. diffusers/pipelines/kolors/text_encoder.py +3 -3
  321. diffusers/pipelines/kolors/tokenizer.py +1 -1
  322. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
  323. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
  324. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  325. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
  326. diffusers/pipelines/latte/pipeline_latte.py +12 -12
  327. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
  328. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
  329. diffusers/pipelines/ltx/__init__.py +4 -0
  330. diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
  331. diffusers/pipelines/ltx/pipeline_ltx.py +64 -18
  332. diffusers/pipelines/ltx/pipeline_ltx_condition.py +117 -38
  333. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +63 -18
  334. diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
  335. diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
  336. diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
  337. diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
  338. diffusers/pipelines/mochi/pipeline_mochi.py +15 -14
  339. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
  340. diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
  341. diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
  342. diffusers/pipelines/onnx_utils.py +15 -2
  343. diffusers/pipelines/pag/pag_utils.py +2 -2
  344. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
  345. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
  346. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
  347. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
  348. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
  349. diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
  350. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
  351. diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
  352. diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
  353. diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
  354. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
  355. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
  356. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
  357. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
  358. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
  359. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
  360. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
  361. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  362. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
  363. diffusers/pipelines/pia/pipeline_pia.py +8 -6
  364. diffusers/pipelines/pipeline_flax_utils.py +5 -6
  365. diffusers/pipelines/pipeline_loading_utils.py +113 -15
  366. diffusers/pipelines/pipeline_utils.py +127 -48
  367. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +14 -12
  368. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +31 -11
  369. diffusers/pipelines/qwenimage/__init__.py +55 -0
  370. diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
  371. diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
  372. diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
  373. diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
  374. diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
  375. diffusers/pipelines/sana/__init__.py +4 -0
  376. diffusers/pipelines/sana/pipeline_sana.py +23 -21
  377. diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
  378. diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
  379. diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
  380. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
  381. diffusers/pipelines/shap_e/camera.py +1 -1
  382. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  383. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  384. diffusers/pipelines/shap_e/renderer.py +3 -3
  385. diffusers/pipelines/skyreels_v2/__init__.py +59 -0
  386. diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
  387. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
  388. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
  389. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
  390. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
  391. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
  392. diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
  393. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
  394. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
  395. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
  396. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
  397. diffusers/pipelines/stable_diffusion/__init__.py +0 -7
  398. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  399. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
  400. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  401. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
  402. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  403. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +12 -11
  404. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
  405. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +11 -11
  406. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +10 -10
  407. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -9
  408. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
  409. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
  410. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
  411. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
  412. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
  413. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
  414. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
  415. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
  416. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
  417. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  418. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  419. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  420. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +13 -12
  421. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
  422. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
  423. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
  424. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
  425. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
  426. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
  427. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
  428. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
  429. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
  430. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
  431. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
  432. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  433. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
  434. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  435. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
  436. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
  437. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
  438. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
  439. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
  440. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
  441. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
  442. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
  443. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
  444. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
  445. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
  446. diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
  447. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
  448. diffusers/pipelines/unclip/text_proj.py +2 -2
  449. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
  450. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  451. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
  452. diffusers/pipelines/visualcloze/__init__.py +52 -0
  453. diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
  454. diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
  455. diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
  456. diffusers/pipelines/wan/__init__.py +2 -0
  457. diffusers/pipelines/wan/pipeline_wan.py +91 -30
  458. diffusers/pipelines/wan/pipeline_wan_i2v.py +145 -45
  459. diffusers/pipelines/wan/pipeline_wan_vace.py +975 -0
  460. diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
  461. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  462. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
  463. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  464. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  465. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
  466. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
  467. diffusers/quantizers/__init__.py +3 -1
  468. diffusers/quantizers/base.py +17 -1
  469. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
  470. diffusers/quantizers/bitsandbytes/utils.py +10 -7
  471. diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
  472. diffusers/quantizers/gguf/utils.py +108 -16
  473. diffusers/quantizers/pipe_quant_config.py +202 -0
  474. diffusers/quantizers/quantization_config.py +18 -16
  475. diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
  476. diffusers/quantizers/torchao/torchao_quantizer.py +31 -1
  477. diffusers/schedulers/__init__.py +3 -1
  478. diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
  479. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  480. diffusers/schedulers/scheduling_consistency_models.py +1 -1
  481. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
  482. diffusers/schedulers/scheduling_ddim.py +8 -8
  483. diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
  484. diffusers/schedulers/scheduling_ddim_flax.py +6 -6
  485. diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
  486. diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
  487. diffusers/schedulers/scheduling_ddpm.py +9 -9
  488. diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
  489. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
  490. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
  491. diffusers/schedulers/scheduling_deis_multistep.py +16 -9
  492. diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
  493. diffusers/schedulers/scheduling_dpmsolver_multistep.py +18 -12
  494. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
  495. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
  496. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  497. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +19 -13
  498. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
  499. diffusers/schedulers/scheduling_edm_euler.py +20 -11
  500. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
  501. diffusers/schedulers/scheduling_euler_discrete.py +3 -3
  502. diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
  503. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
  504. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
  505. diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
  506. diffusers/schedulers/scheduling_heun_discrete.py +2 -2
  507. diffusers/schedulers/scheduling_ipndm.py +2 -2
  508. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
  509. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
  510. diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
  511. diffusers/schedulers/scheduling_lcm.py +3 -3
  512. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  513. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  514. diffusers/schedulers/scheduling_pndm.py +4 -4
  515. diffusers/schedulers/scheduling_pndm_flax.py +4 -4
  516. diffusers/schedulers/scheduling_repaint.py +9 -9
  517. diffusers/schedulers/scheduling_sasolver.py +15 -15
  518. diffusers/schedulers/scheduling_scm.py +1 -2
  519. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  520. diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
  521. diffusers/schedulers/scheduling_tcd.py +3 -3
  522. diffusers/schedulers/scheduling_unclip.py +5 -5
  523. diffusers/schedulers/scheduling_unipc_multistep.py +21 -12
  524. diffusers/schedulers/scheduling_utils.py +3 -3
  525. diffusers/schedulers/scheduling_utils_flax.py +2 -2
  526. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  527. diffusers/training_utils.py +91 -5
  528. diffusers/utils/__init__.py +15 -0
  529. diffusers/utils/accelerate_utils.py +1 -1
  530. diffusers/utils/constants.py +4 -0
  531. diffusers/utils/doc_utils.py +1 -1
  532. diffusers/utils/dummy_pt_objects.py +432 -0
  533. diffusers/utils/dummy_torch_and_transformers_objects.py +480 -0
  534. diffusers/utils/dynamic_modules_utils.py +85 -8
  535. diffusers/utils/export_utils.py +1 -1
  536. diffusers/utils/hub_utils.py +33 -17
  537. diffusers/utils/import_utils.py +151 -18
  538. diffusers/utils/logging.py +1 -1
  539. diffusers/utils/outputs.py +2 -1
  540. diffusers/utils/peft_utils.py +96 -10
  541. diffusers/utils/state_dict_utils.py +20 -3
  542. diffusers/utils/testing_utils.py +195 -17
  543. diffusers/utils/torch_utils.py +43 -5
  544. diffusers/video_processor.py +2 -2
  545. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/METADATA +72 -57
  546. diffusers-0.35.0.dist-info/RECORD +703 -0
  547. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/WHEEL +1 -1
  548. diffusers-0.33.1.dist-info/RECORD +0 -608
  549. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
  550. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
  551. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Stability AI and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Stability AI and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Stability AI and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Stability AI and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -306,7 +306,7 @@ class StableAudioPipeline(DiffusionPipeline):
306
306
  def prepare_extra_step_kwargs(self, generator, eta):
307
307
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
308
308
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
309
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
309
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
310
310
  # and should be between [0, 1]
311
311
 
312
312
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -526,8 +526,8 @@ class StableAudioPipeline(DiffusionPipeline):
526
526
  num_waveforms_per_prompt (`int`, *optional*, defaults to 1):
527
527
  The number of waveforms to generate per prompt.
528
528
  eta (`float`, *optional*, defaults to 0.0):
529
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
530
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
529
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
530
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
531
531
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
532
532
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
533
533
  generation deterministic.
@@ -616,7 +616,7 @@ class StableAudioPipeline(DiffusionPipeline):
616
616
 
617
617
  device = self._execution_device
618
618
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
619
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
619
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
620
620
  # corresponds to doing no classifier free guidance.
621
621
  do_classifier_free_guidance = guidance_scale > 1.0
622
622
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -332,11 +332,11 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
332
332
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
333
333
  expense of slower inference.
334
334
  guidance_scale (`float`, *optional*, defaults to 0.0):
335
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
336
- `decoder_guidance_scale` is defined as `w` of equation 2. of [Imagen
337
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting
338
- `decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely
339
- linked to the text `prompt`, usually at the expense of lower image quality.
335
+ Guidance scale as defined in [Classifier-Free Diffusion
336
+ Guidance](https://huggingface.co/papers/2207.12598). `decoder_guidance_scale` is defined as `w` of
337
+ equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
338
+ setting `decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
339
+ closely linked to the text `prompt`, usually at the expense of lower image quality.
340
340
  negative_prompt (`str` or `List[str]`, *optional*):
341
341
  The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
342
342
  if `decoder_guidance_scale` is less than `1`).
@@ -524,9 +524,9 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
524
524
  latents = self.vqgan.config.scale_factor * latents
525
525
  images = self.vqgan.decode(latents).sample.clamp(0, 1)
526
526
  if output_type == "np":
527
- images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesnt work
527
+ images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesn't work
528
528
  elif output_type == "pil":
529
- images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesnt work
529
+ images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesn't work
530
530
  images = self.numpy_to_pil(images)
531
531
  else:
532
532
  images = latents
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -125,7 +125,7 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
125
125
  def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
126
126
  self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
127
127
 
128
- def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
128
+ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None):
129
129
  r"""
130
130
  Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
131
131
  to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
@@ -135,7 +135,7 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
135
135
  self.prior_pipe.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
136
136
  self.decoder_pipe.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
137
137
 
138
- def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
138
+ def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None):
139
139
  r"""
140
140
  Offloads all models (`unet`, `text_encoder`, `vae`, and `safety checker` state dicts) to CPU using 🤗
141
141
  Accelerate, significantly reducing memory usage. Models are moved to a `torch.device('meta')` and loaded on a
@@ -212,11 +212,11 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
212
212
  width (`int`, *optional*, defaults to 512):
213
213
  The width in pixels of the generated image.
214
214
  prior_guidance_scale (`float`, *optional*, defaults to 4.0):
215
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
216
- `prior_guidance_scale` is defined as `w` of equation 2. of [Imagen
217
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting
218
- `prior_guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked
219
- to the text `prompt`, usually at the expense of lower image quality.
215
+ Guidance scale as defined in [Classifier-Free Diffusion
216
+ Guidance](https://huggingface.co/papers/2207.12598). `prior_guidance_scale` is defined as `w` of
217
+ equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
218
+ setting `prior_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
219
+ closely linked to the text `prompt`, usually at the expense of lower image quality.
220
220
  prior_num_inference_steps (`Union[int, Dict[float, int]]`, *optional*, defaults to 60):
221
221
  The number of prior denoising steps. More denoising steps usually lead to a higher quality image at the
222
222
  expense of slower inference. For more specific timestep spacing, you can pass customized
@@ -226,11 +226,11 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
226
226
  the expense of slower inference. For more specific timestep spacing, you can pass customized
227
227
  `timesteps`
228
228
  decoder_guidance_scale (`float`, *optional*, defaults to 0.0):
229
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
230
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
231
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
232
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
233
- usually at the expense of lower image quality.
229
+ Guidance scale as defined in [Classifier-Free Diffusion
230
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
231
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
232
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
233
+ the text `prompt`, usually at the expense of lower image quality.
234
234
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
235
235
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
236
236
  to make generation deterministic.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -409,11 +409,11 @@ class StableCascadePriorPipeline(DiffusionPipeline):
409
409
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
410
410
  expense of slower inference.
411
411
  guidance_scale (`float`, *optional*, defaults to 8.0):
412
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
413
- `decoder_guidance_scale` is defined as `w` of equation 2. of [Imagen
414
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting
415
- `decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely
416
- linked to the text `prompt`, usually at the expense of lower image quality.
412
+ Guidance scale as defined in [Classifier-Free Diffusion
413
+ Guidance](https://huggingface.co/papers/2207.12598). `decoder_guidance_scale` is defined as `w` of
414
+ equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
415
+ setting `decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
416
+ closely linked to the text `prompt`, usually at the expense of lower image quality.
417
417
  negative_prompt (`str` or `List[str]`, *optional*):
418
418
  The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
419
419
  if `decoder_guidance_scale` is less than `1`).
@@ -626,11 +626,11 @@ class StableCascadePriorPipeline(DiffusionPipeline):
626
626
  self.maybe_free_model_hooks()
627
627
 
628
628
  if output_type == "np":
629
- latents = latents.cpu().float().numpy() # float() as bfloat16-> numpy doesnt work
630
- prompt_embeds = prompt_embeds.cpu().float().numpy() # float() as bfloat16-> numpy doesnt work
629
+ latents = latents.cpu().float().numpy() # float() as bfloat16-> numpy doesn't work
630
+ prompt_embeds = prompt_embeds.cpu().float().numpy() # float() as bfloat16-> numpy doesn't work
631
631
  negative_prompt_embeds = (
632
632
  negative_prompt_embeds.cpu().float().numpy() if negative_prompt_embeds is not None else None
633
- ) # float() as bfloat16-> numpy doesnt work
633
+ ) # float() as bfloat16-> numpy doesn't work
634
634
 
635
635
  if not return_dict:
636
636
  return (
@@ -30,18 +30,11 @@ except OptionalDependencyNotAvailable:
30
30
  _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
31
31
  else:
32
32
  _import_structure["clip_image_project_model"] = ["CLIPImageProjection"]
33
- _import_structure["pipeline_cycle_diffusion"] = ["CycleDiffusionPipeline"]
34
33
  _import_structure["pipeline_stable_diffusion"] = ["StableDiffusionPipeline"]
35
- _import_structure["pipeline_stable_diffusion_attend_and_excite"] = ["StableDiffusionAttendAndExcitePipeline"]
36
- _import_structure["pipeline_stable_diffusion_gligen"] = ["StableDiffusionGLIGENPipeline"]
37
- _import_structure["pipeline_stable_diffusion_gligen_text_image"] = ["StableDiffusionGLIGENTextImagePipeline"]
38
34
  _import_structure["pipeline_stable_diffusion_img2img"] = ["StableDiffusionImg2ImgPipeline"]
39
35
  _import_structure["pipeline_stable_diffusion_inpaint"] = ["StableDiffusionInpaintPipeline"]
40
- _import_structure["pipeline_stable_diffusion_inpaint_legacy"] = ["StableDiffusionInpaintPipelineLegacy"]
41
36
  _import_structure["pipeline_stable_diffusion_instruct_pix2pix"] = ["StableDiffusionInstructPix2PixPipeline"]
42
37
  _import_structure["pipeline_stable_diffusion_latent_upscale"] = ["StableDiffusionLatentUpscalePipeline"]
43
- _import_structure["pipeline_stable_diffusion_model_editing"] = ["StableDiffusionModelEditingPipeline"]
44
- _import_structure["pipeline_stable_diffusion_paradigms"] = ["StableDiffusionParadigmsPipeline"]
45
38
  _import_structure["pipeline_stable_diffusion_upscale"] = ["StableDiffusionUpscalePipeline"]
46
39
  _import_structure["pipeline_stable_unclip"] = ["StableUnCLIPPipeline"]
47
40
  _import_structure["pipeline_stable_unclip_img2img"] = ["StableUnCLIPImg2ImgPipeline"]
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The GLIGEN Authors and HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The GLIGEN Authors and HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -53,6 +53,7 @@ from ...schedulers import (
53
53
  )
54
54
  from ...utils import is_accelerate_available, logging
55
55
  from ...utils.constants import DIFFUSERS_REQUEST_TIMEOUT
56
+ from ...utils.torch_utils import get_device
56
57
  from ..latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel
57
58
  from ..paint_by_example import PaintByExampleImageEncoder
58
59
  from ..pipeline_utils import DiffusionPipeline
@@ -350,8 +351,14 @@ def create_vae_diffusers_config(original_config, image_size: int):
350
351
  _ = original_config["model"]["params"]["first_stage_config"]["params"]["embed_dim"]
351
352
 
352
353
  block_out_channels = [vae_params["ch"] * mult for mult in vae_params["ch_mult"]]
353
- down_block_types = ["DownEncoderBlock2D"] * len(block_out_channels)
354
- up_block_types = ["UpDecoderBlock2D"] * len(block_out_channels)
354
+ down_block_types = [
355
+ "DownEncoderBlock2D" if image_size // 2**i not in vae_params["attn_resolutions"] else "AttnDownEncoderBlock2D"
356
+ for i, _ in enumerate(block_out_channels)
357
+ ]
358
+ up_block_types = [
359
+ "UpDecoderBlock2D" if image_size // 2**i not in vae_params["attn_resolutions"] else "AttnUpDecoderBlock2D"
360
+ for i, _ in enumerate(block_out_channels)
361
+ ][::-1]
355
362
 
356
363
  config = {
357
364
  "sample_size": image_size,
@@ -1266,7 +1273,7 @@ def download_from_original_stable_diffusion_ckpt(
1266
1273
  checkpoint = safe_load(checkpoint_path_or_dict, device="cpu")
1267
1274
  else:
1268
1275
  if device is None:
1269
- device = "cuda" if torch.cuda.is_available() else "cpu"
1276
+ device = get_device()
1270
1277
  checkpoint = torch.load(checkpoint_path_or_dict, map_location=device)
1271
1278
  else:
1272
1279
  checkpoint = torch.load(checkpoint_path_or_dict, map_location=device)
@@ -1836,7 +1843,7 @@ def download_controlnet_from_original_ckpt(
1836
1843
  checkpoint[key] = f.get_tensor(key)
1837
1844
  else:
1838
1845
  if device is None:
1839
- device = "cuda" if torch.cuda.is_available() else "cpu"
1846
+ device = get_device()
1840
1847
  checkpoint = torch.load(checkpoint_path, map_location=device)
1841
1848
  else:
1842
1849
  checkpoint = torch.load(checkpoint_path, map_location=device)
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -294,11 +294,11 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
294
294
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
295
295
  expense of slower inference.
296
296
  guidance_scale (`float`, *optional*, defaults to 7.5):
297
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
298
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
299
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
300
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
301
- usually at the expense of lower image quality.
297
+ Guidance scale as defined in [Classifier-Free Diffusion
298
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
299
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
300
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
301
+ the text `prompt`, usually at the expense of lower image quality.
302
302
  negative_prompt (`str` or `List[str]`, *optional*):
303
303
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
304
304
  `negative_prompt_embeds`. instead. Ignored when not using guidance (i.e., ignored if `guidance_scale`
@@ -306,8 +306,8 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
306
306
  num_images_per_prompt (`int`, *optional*, defaults to 1):
307
307
  The number of images to generate per prompt.
308
308
  eta (`float`, *optional*, defaults to 0.0):
309
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
310
- [`schedulers.DDIMScheduler`], will be ignored for others.
309
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
310
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
311
311
  generator (`np.random.RandomState`, *optional*):
312
312
  One or a list of [numpy generator(s)](TODO) to make generation deterministic.
313
313
  latents (`np.ndarray`, *optional*):
@@ -359,7 +359,7 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
359
359
  generator = np.random
360
360
 
361
361
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
362
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
362
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
363
363
  # corresponds to doing no classifier free guidance.
364
364
  do_classifier_free_guidance = guidance_scale > 1.0
365
365
 
@@ -383,11 +383,12 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
383
383
  # set timesteps
384
384
  self.scheduler.set_timesteps(num_inference_steps)
385
385
 
386
- latents = latents * np.float64(self.scheduler.init_noise_sigma)
386
+ # scale the initial noise by the standard deviation required by the scheduler
387
+ latents = latents * self.scheduler.init_noise_sigma
387
388
 
388
389
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
389
390
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
390
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
391
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
391
392
  # and should be between [0, 1]
392
393
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
393
394
  extra_step_kwargs = {}
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -348,19 +348,19 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
348
348
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
349
349
  expense of slower inference. This parameter will be modulated by `strength`.
350
350
  guidance_scale (`float`, *optional*, defaults to 7.5):
351
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
352
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
353
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
354
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
355
- usually at the expense of lower image quality.
351
+ Guidance scale as defined in [Classifier-Free Diffusion
352
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
353
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
354
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
355
+ the text `prompt`, usually at the expense of lower image quality.
356
356
  negative_prompt (`str` or `List[str]`, *optional*):
357
357
  The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
358
358
  if `guidance_scale` is less than `1`).
359
359
  num_images_per_prompt (`int`, *optional*, defaults to 1):
360
360
  The number of images to generate per prompt.
361
361
  eta (`float`, *optional*, defaults to 0.0):
362
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
363
- [`schedulers.DDIMScheduler`], will be ignored for others.
362
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
363
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
364
364
  generator (`np.random.RandomState`, *optional*):
365
365
  A np.random.RandomState to make generation deterministic.
366
366
  prompt_embeds (`np.ndarray`, *optional*):
@@ -414,7 +414,7 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
414
414
  image = preprocess(image).cpu().numpy()
415
415
 
416
416
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
417
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
417
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
418
418
  # corresponds to doing no classifier free guidance.
419
419
  do_classifier_free_guidance = guidance_scale > 1.0
420
420
 
@@ -470,7 +470,7 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
470
470
 
471
471
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
472
472
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
473
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
473
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
474
474
  # and should be between [0, 1]
475
475
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
476
476
  extra_step_kwargs = {}
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -360,19 +360,19 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
360
360
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
361
361
  expense of slower inference.
362
362
  guidance_scale (`float`, *optional*, defaults to 7.5):
363
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
364
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
365
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
366
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
367
- usually at the expense of lower image quality.
363
+ Guidance scale as defined in [Classifier-Free Diffusion
364
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
365
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
366
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
367
+ the text `prompt`, usually at the expense of lower image quality.
368
368
  negative_prompt (`str` or `List[str]`, *optional*):
369
369
  The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
370
370
  if `guidance_scale` is less than `1`).
371
371
  num_images_per_prompt (`int`, *optional*, defaults to 1):
372
372
  The number of images to generate per prompt.
373
373
  eta (`float`, *optional*, defaults to 0.0):
374
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
375
- [`schedulers.DDIMScheduler`], will be ignored for others.
374
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
375
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
376
376
  generator (`np.random.RandomState`, *optional*):
377
377
  A np.random.RandomState to make generation deterministic.
378
378
  latents (`np.ndarray`, *optional*):
@@ -427,7 +427,7 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
427
427
  self.scheduler.set_timesteps(num_inference_steps)
428
428
 
429
429
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
430
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
430
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
431
431
  # corresponds to doing no classifier free guidance.
432
432
  do_classifier_free_guidance = guidance_scale > 1.0
433
433
 
@@ -483,11 +483,11 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
483
483
  self.scheduler.set_timesteps(num_inference_steps)
484
484
 
485
485
  # scale the initial noise by the standard deviation required by the scheduler
486
- latents = latents * np.float64(self.scheduler.init_noise_sigma)
486
+ latents = latents * self.scheduler.init_noise_sigma
487
487
 
488
488
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
489
489
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
490
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
490
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
491
491
  # and should be between [0, 1]
492
492
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
493
493
  extra_step_kwargs = {}
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -378,11 +378,11 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
378
378
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
379
379
  expense of slower inference. This parameter will be modulated by `strength`.
380
380
  guidance_scale (`float`, *optional*, defaults to 7.5):
381
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
382
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
383
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
384
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
385
- usually at the expense of lower image quality.
381
+ Guidance scale as defined in [Classifier-Free Diffusion
382
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
383
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
384
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
385
+ the text `prompt`, usually at the expense of lower image quality.
386
386
  noise_level (`float`, defaults to 0.2):
387
387
  Deteremines the amount of noise to add to the initial image before performing upscaling.
388
388
  negative_prompt (`str` or `List[str]`, *optional*):
@@ -391,8 +391,8 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
391
391
  num_images_per_prompt (`int`, *optional*, defaults to 1):
392
392
  The number of images to generate per prompt.
393
393
  eta (`float`, *optional*, defaults to 0.0):
394
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
395
- [`schedulers.DDIMScheduler`], will be ignored for others.
394
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
395
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
396
396
  generator (`np.random.RandomState`, *optional*):
397
397
  A np.random.RandomState to make generation deterministic.
398
398
  latents (`torch.Tensor`, *optional*):
@@ -450,7 +450,7 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
450
450
  generator = np.random
451
451
 
452
452
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
453
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
453
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
454
454
  # corresponds to doing no classifier free guidance.
455
455
  do_classifier_free_guidance = guidance_scale > 1.0
456
456
 
@@ -481,7 +481,7 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
481
481
  timesteps = self.scheduler.timesteps
482
482
 
483
483
  # Scale the initial noise by the standard deviation required by the scheduler
484
- latents = latents * np.float64(self.scheduler.init_noise_sigma)
484
+ latents = latents * self.scheduler.init_noise_sigma
485
485
 
486
486
  # 5. Add noise to image
487
487
  noise_level = np.array([noise_level]).astype(np.int64)
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -70,7 +70,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
70
70
  r"""
71
71
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
72
72
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
73
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
73
+ Flawed](https://huggingface.co/papers/2305.08891).
74
74
 
75
75
  Args:
76
76
  noise_cfg (`torch.Tensor`):
@@ -608,7 +608,7 @@ class StableDiffusionPipeline(
608
608
  def prepare_extra_step_kwargs(self, generator, eta):
609
609
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
610
610
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
611
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
611
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
612
612
  # and should be between [0, 1]
613
613
 
614
614
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -757,7 +757,7 @@ class StableDiffusionPipeline(
757
757
  return self._clip_skip
758
758
 
759
759
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
760
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
760
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
761
761
  # corresponds to doing no classifier free guidance.
762
762
  @property
763
763
  def do_classifier_free_guidance(self):
@@ -836,8 +836,8 @@ class StableDiffusionPipeline(
836
836
  num_images_per_prompt (`int`, *optional*, defaults to 1):
837
837
  The number of images to generate per prompt.
838
838
  eta (`float`, *optional*, defaults to 0.0):
839
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
840
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
839
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
840
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
841
841
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
842
842
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
843
843
  generation deterministic.
@@ -867,7 +867,7 @@ class StableDiffusionPipeline(
867
867
  [`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
868
868
  guidance_rescale (`float`, *optional*, defaults to 0.0):
869
869
  Guidance rescale factor from [Common Diffusion Noise Schedules and Sample Steps are
870
- Flawed](https://arxiv.org/pdf/2305.08891.pdf). Guidance rescale factor should fix overexposure when
870
+ Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
871
871
  using zero terminal SNR.
872
872
  clip_skip (`int`, *optional*):
873
873
  Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
@@ -1034,7 +1034,8 @@ class StableDiffusionPipeline(
1034
1034
 
1035
1035
  # expand the latents if we are doing classifier free guidance
1036
1036
  latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
1037
- latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
1037
+ if hasattr(self.scheduler, "scale_model_input"):
1038
+ latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
1038
1039
 
1039
1040
  # predict the noise residual
1040
1041
  noise_pred = self.unet(
@@ -1053,7 +1054,7 @@ class StableDiffusionPipeline(
1053
1054
  noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
1054
1055
 
1055
1056
  if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
1056
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
1057
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
1057
1058
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
1058
1059
 
1059
1060
  # compute the previous noisy sample x_t -> x_t-1
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -414,7 +414,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
414
414
  def prepare_extra_step_kwargs(self, generator, eta):
415
415
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
416
416
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
417
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
417
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
418
418
  # and should be between [0, 1]
419
419
 
420
420
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -617,7 +617,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
617
617
  return self._clip_skip
618
618
 
619
619
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
620
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
620
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
621
621
  # corresponds to doing no classifier free guidance.
622
622
  @property
623
623
  def do_classifier_free_guidance(self):
@@ -684,8 +684,8 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
684
684
  num_images_per_prompt (`int`, *optional*, defaults to 1):
685
685
  The number of images to generate per prompt.
686
686
  eta (`float`, *optional*, defaults to 0.0):
687
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
688
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
687
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
688
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
689
689
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
690
690
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
691
691
  generation deterministic.