diffusers 0.32.2__py3-none-any.whl → 0.33.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (389) hide show
  1. diffusers/__init__.py +186 -3
  2. diffusers/configuration_utils.py +40 -12
  3. diffusers/dependency_versions_table.py +9 -2
  4. diffusers/hooks/__init__.py +9 -0
  5. diffusers/hooks/faster_cache.py +653 -0
  6. diffusers/hooks/group_offloading.py +793 -0
  7. diffusers/hooks/hooks.py +236 -0
  8. diffusers/hooks/layerwise_casting.py +245 -0
  9. diffusers/hooks/pyramid_attention_broadcast.py +311 -0
  10. diffusers/loaders/__init__.py +6 -0
  11. diffusers/loaders/ip_adapter.py +38 -30
  12. diffusers/loaders/lora_base.py +121 -86
  13. diffusers/loaders/lora_conversion_utils.py +504 -44
  14. diffusers/loaders/lora_pipeline.py +1769 -181
  15. diffusers/loaders/peft.py +167 -57
  16. diffusers/loaders/single_file.py +17 -2
  17. diffusers/loaders/single_file_model.py +53 -5
  18. diffusers/loaders/single_file_utils.py +646 -72
  19. diffusers/loaders/textual_inversion.py +9 -9
  20. diffusers/loaders/transformer_flux.py +8 -9
  21. diffusers/loaders/transformer_sd3.py +120 -39
  22. diffusers/loaders/unet.py +20 -7
  23. diffusers/models/__init__.py +22 -0
  24. diffusers/models/activations.py +9 -9
  25. diffusers/models/attention.py +0 -1
  26. diffusers/models/attention_processor.py +163 -25
  27. diffusers/models/auto_model.py +169 -0
  28. diffusers/models/autoencoders/__init__.py +2 -0
  29. diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
  30. diffusers/models/autoencoders/autoencoder_dc.py +106 -4
  31. diffusers/models/autoencoders/autoencoder_kl.py +0 -4
  32. diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
  33. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
  34. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
  35. diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
  36. diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
  37. diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
  38. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
  39. diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
  40. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
  41. diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
  42. diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
  43. diffusers/models/autoencoders/vae.py +31 -141
  44. diffusers/models/autoencoders/vq_model.py +3 -0
  45. diffusers/models/cache_utils.py +108 -0
  46. diffusers/models/controlnets/__init__.py +1 -0
  47. diffusers/models/controlnets/controlnet.py +3 -8
  48. diffusers/models/controlnets/controlnet_flux.py +14 -42
  49. diffusers/models/controlnets/controlnet_sd3.py +58 -34
  50. diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
  51. diffusers/models/controlnets/controlnet_union.py +27 -18
  52. diffusers/models/controlnets/controlnet_xs.py +7 -46
  53. diffusers/models/controlnets/multicontrolnet_union.py +196 -0
  54. diffusers/models/embeddings.py +18 -7
  55. diffusers/models/model_loading_utils.py +122 -80
  56. diffusers/models/modeling_flax_pytorch_utils.py +1 -1
  57. diffusers/models/modeling_flax_utils.py +1 -1
  58. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  59. diffusers/models/modeling_utils.py +617 -272
  60. diffusers/models/normalization.py +67 -14
  61. diffusers/models/resnet.py +1 -1
  62. diffusers/models/transformers/__init__.py +6 -0
  63. diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
  64. diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
  65. diffusers/models/transformers/consisid_transformer_3d.py +789 -0
  66. diffusers/models/transformers/dit_transformer_2d.py +5 -19
  67. diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
  68. diffusers/models/transformers/latte_transformer_3d.py +20 -15
  69. diffusers/models/transformers/lumina_nextdit2d.py +3 -1
  70. diffusers/models/transformers/pixart_transformer_2d.py +4 -19
  71. diffusers/models/transformers/prior_transformer.py +5 -1
  72. diffusers/models/transformers/sana_transformer.py +144 -40
  73. diffusers/models/transformers/stable_audio_transformer.py +5 -20
  74. diffusers/models/transformers/transformer_2d.py +7 -22
  75. diffusers/models/transformers/transformer_allegro.py +9 -17
  76. diffusers/models/transformers/transformer_cogview3plus.py +6 -17
  77. diffusers/models/transformers/transformer_cogview4.py +462 -0
  78. diffusers/models/transformers/transformer_easyanimate.py +527 -0
  79. diffusers/models/transformers/transformer_flux.py +68 -110
  80. diffusers/models/transformers/transformer_hunyuan_video.py +404 -46
  81. diffusers/models/transformers/transformer_ltx.py +53 -35
  82. diffusers/models/transformers/transformer_lumina2.py +548 -0
  83. diffusers/models/transformers/transformer_mochi.py +6 -17
  84. diffusers/models/transformers/transformer_omnigen.py +469 -0
  85. diffusers/models/transformers/transformer_sd3.py +56 -86
  86. diffusers/models/transformers/transformer_temporal.py +5 -11
  87. diffusers/models/transformers/transformer_wan.py +469 -0
  88. diffusers/models/unets/unet_1d.py +3 -1
  89. diffusers/models/unets/unet_2d.py +21 -20
  90. diffusers/models/unets/unet_2d_blocks.py +19 -243
  91. diffusers/models/unets/unet_2d_condition.py +4 -6
  92. diffusers/models/unets/unet_3d_blocks.py +14 -127
  93. diffusers/models/unets/unet_3d_condition.py +8 -12
  94. diffusers/models/unets/unet_i2vgen_xl.py +5 -13
  95. diffusers/models/unets/unet_kandinsky3.py +0 -4
  96. diffusers/models/unets/unet_motion_model.py +20 -114
  97. diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
  98. diffusers/models/unets/unet_stable_cascade.py +8 -35
  99. diffusers/models/unets/uvit_2d.py +1 -4
  100. diffusers/optimization.py +2 -2
  101. diffusers/pipelines/__init__.py +57 -8
  102. diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
  103. diffusers/pipelines/amused/pipeline_amused.py +15 -2
  104. diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
  105. diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
  106. diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
  107. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
  108. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
  109. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
  110. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
  111. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
  112. diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
  113. diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
  114. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
  115. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
  116. diffusers/pipelines/auto_pipeline.py +35 -14
  117. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  118. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
  119. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
  120. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
  121. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
  122. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
  123. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
  124. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
  125. diffusers/pipelines/cogview4/__init__.py +49 -0
  126. diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
  127. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
  128. diffusers/pipelines/cogview4/pipeline_output.py +21 -0
  129. diffusers/pipelines/consisid/__init__.py +49 -0
  130. diffusers/pipelines/consisid/consisid_utils.py +357 -0
  131. diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
  132. diffusers/pipelines/consisid/pipeline_output.py +20 -0
  133. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
  134. diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
  135. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
  136. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
  137. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
  138. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
  139. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
  140. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
  141. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
  142. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
  143. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
  144. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
  145. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
  146. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
  147. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
  148. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
  149. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
  150. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
  151. diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
  152. diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
  153. diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
  154. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
  155. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
  156. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
  157. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
  158. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
  159. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
  160. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
  161. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
  162. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
  163. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
  164. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
  165. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
  166. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
  167. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
  168. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
  169. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
  170. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
  171. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
  172. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
  173. diffusers/pipelines/dit/pipeline_dit.py +15 -2
  174. diffusers/pipelines/easyanimate/__init__.py +52 -0
  175. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
  176. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
  177. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
  178. diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
  179. diffusers/pipelines/flux/pipeline_flux.py +53 -21
  180. diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
  181. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
  182. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
  183. diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
  184. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
  185. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
  186. diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
  187. diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
  188. diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
  189. diffusers/pipelines/free_noise_utils.py +3 -3
  190. diffusers/pipelines/hunyuan_video/__init__.py +4 -0
  191. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
  192. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
  193. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
  194. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
  195. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
  196. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
  197. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
  198. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
  199. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
  200. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
  201. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
  202. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
  203. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
  204. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
  205. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
  206. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
  207. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
  208. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
  209. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
  210. diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
  211. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
  212. diffusers/pipelines/kolors/text_encoder.py +7 -34
  213. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
  214. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
  215. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
  216. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
  217. diffusers/pipelines/latte/pipeline_latte.py +36 -7
  218. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
  219. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
  220. diffusers/pipelines/ltx/__init__.py +2 -0
  221. diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
  222. diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
  223. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
  224. diffusers/pipelines/lumina/__init__.py +2 -2
  225. diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
  226. diffusers/pipelines/lumina2/__init__.py +48 -0
  227. diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
  228. diffusers/pipelines/marigold/__init__.py +2 -0
  229. diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
  230. diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
  231. diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
  232. diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
  233. diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
  234. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
  235. diffusers/pipelines/omnigen/__init__.py +50 -0
  236. diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
  237. diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
  238. diffusers/pipelines/onnx_utils.py +5 -3
  239. diffusers/pipelines/pag/pag_utils.py +1 -1
  240. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
  241. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
  242. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
  243. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
  244. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
  245. diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
  246. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
  247. diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
  248. diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
  249. diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
  250. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
  251. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
  252. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
  253. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
  254. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
  255. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
  256. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
  257. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
  258. diffusers/pipelines/pia/pipeline_pia.py +13 -1
  259. diffusers/pipelines/pipeline_flax_utils.py +7 -7
  260. diffusers/pipelines/pipeline_loading_utils.py +193 -83
  261. diffusers/pipelines/pipeline_utils.py +221 -106
  262. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
  263. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
  264. diffusers/pipelines/sana/__init__.py +2 -0
  265. diffusers/pipelines/sana/pipeline_sana.py +183 -58
  266. diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
  267. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
  268. diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
  269. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
  270. diffusers/pipelines/shap_e/renderer.py +6 -6
  271. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
  272. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
  273. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
  274. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
  275. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
  276. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
  277. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
  278. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
  279. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  280. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
  281. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
  282. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
  283. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
  284. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
  285. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
  286. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
  287. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
  288. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
  289. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
  290. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
  291. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
  292. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
  293. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
  294. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
  295. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
  296. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
  297. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
  298. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
  299. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
  300. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
  301. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
  302. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
  303. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
  304. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
  305. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
  306. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  307. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
  308. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
  309. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
  310. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
  311. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
  312. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
  313. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
  314. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
  315. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
  316. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
  317. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
  318. diffusers/pipelines/transformers_loading_utils.py +121 -0
  319. diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
  320. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
  321. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
  322. diffusers/pipelines/wan/__init__.py +51 -0
  323. diffusers/pipelines/wan/pipeline_output.py +20 -0
  324. diffusers/pipelines/wan/pipeline_wan.py +595 -0
  325. diffusers/pipelines/wan/pipeline_wan_i2v.py +724 -0
  326. diffusers/pipelines/wan/pipeline_wan_video2video.py +727 -0
  327. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
  328. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
  329. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
  330. diffusers/quantizers/auto.py +5 -1
  331. diffusers/quantizers/base.py +5 -9
  332. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
  333. diffusers/quantizers/bitsandbytes/utils.py +30 -20
  334. diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
  335. diffusers/quantizers/gguf/utils.py +4 -2
  336. diffusers/quantizers/quantization_config.py +59 -4
  337. diffusers/quantizers/quanto/__init__.py +1 -0
  338. diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
  339. diffusers/quantizers/quanto/utils.py +60 -0
  340. diffusers/quantizers/torchao/__init__.py +1 -1
  341. diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
  342. diffusers/schedulers/__init__.py +2 -1
  343. diffusers/schedulers/scheduling_consistency_models.py +1 -2
  344. diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
  345. diffusers/schedulers/scheduling_ddpm.py +2 -3
  346. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
  347. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
  348. diffusers/schedulers/scheduling_edm_euler.py +45 -10
  349. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
  350. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
  351. diffusers/schedulers/scheduling_heun_discrete.py +1 -1
  352. diffusers/schedulers/scheduling_lcm.py +1 -2
  353. diffusers/schedulers/scheduling_lms_discrete.py +1 -1
  354. diffusers/schedulers/scheduling_repaint.py +5 -1
  355. diffusers/schedulers/scheduling_scm.py +265 -0
  356. diffusers/schedulers/scheduling_tcd.py +1 -2
  357. diffusers/schedulers/scheduling_utils.py +2 -1
  358. diffusers/training_utils.py +14 -7
  359. diffusers/utils/__init__.py +9 -1
  360. diffusers/utils/constants.py +13 -1
  361. diffusers/utils/deprecation_utils.py +1 -1
  362. diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
  363. diffusers/utils/dummy_gguf_objects.py +17 -0
  364. diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
  365. diffusers/utils/dummy_pt_objects.py +233 -0
  366. diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
  367. diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
  368. diffusers/utils/dummy_torchao_objects.py +17 -0
  369. diffusers/utils/dynamic_modules_utils.py +1 -1
  370. diffusers/utils/export_utils.py +28 -3
  371. diffusers/utils/hub_utils.py +52 -102
  372. diffusers/utils/import_utils.py +121 -221
  373. diffusers/utils/loading_utils.py +2 -1
  374. diffusers/utils/logging.py +1 -2
  375. diffusers/utils/peft_utils.py +6 -14
  376. diffusers/utils/remote_utils.py +425 -0
  377. diffusers/utils/source_code_parsing_utils.py +52 -0
  378. diffusers/utils/state_dict_utils.py +15 -1
  379. diffusers/utils/testing_utils.py +243 -13
  380. diffusers/utils/torch_utils.py +10 -0
  381. diffusers/utils/typing_utils.py +91 -0
  382. diffusers/video_processor.py +1 -1
  383. {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/METADATA +21 -4
  384. diffusers-0.33.1.dist-info/RECORD +608 -0
  385. {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/WHEEL +1 -1
  386. diffusers-0.32.2.dist-info/RECORD +0 -550
  387. {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/LICENSE +0 -0
  388. {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/entry_points.txt +0 -0
  389. {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/top_level.txt +0 -0
@@ -9,12 +9,19 @@ from ...image_processor import VaeImageProcessor
9
9
  from ...models import AutoencoderKL, UNet2DConditionModel
10
10
  from ...pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
11
11
  from ...schedulers import KarrasDiffusionSchedulers
12
- from ...utils import deprecate, logging
12
+ from ...utils import deprecate, is_torch_xla_available, logging
13
13
  from ...utils.torch_utils import randn_tensor
14
14
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
15
15
  from .pipeline_output import SemanticStableDiffusionPipelineOutput
16
16
 
17
17
 
18
+ if is_torch_xla_available():
19
+ import torch_xla.core.xla_model as xm
20
+
21
+ XLA_AVAILABLE = True
22
+ else:
23
+ XLA_AVAILABLE = False
24
+
18
25
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
19
26
 
20
27
 
@@ -87,7 +94,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
87
94
  safety_checker=safety_checker,
88
95
  feature_extractor=feature_extractor,
89
96
  )
90
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
97
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
91
98
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
92
99
  self.register_to_config(requires_safety_checker=requires_safety_checker)
93
100
 
@@ -701,6 +708,9 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
701
708
  step_idx = i // getattr(self.scheduler, "order", 1)
702
709
  callback(step_idx, t, latents)
703
710
 
711
+ if XLA_AVAILABLE:
712
+ xm.mark_step()
713
+
704
714
  # 8. Post-processing
705
715
  if not output_type == "latent":
706
716
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
@@ -25,6 +25,7 @@ from ...models import PriorTransformer
25
25
  from ...schedulers import HeunDiscreteScheduler
26
26
  from ...utils import (
27
27
  BaseOutput,
28
+ is_torch_xla_available,
28
29
  logging,
29
30
  replace_example_docstring,
30
31
  )
@@ -33,8 +34,16 @@ from ..pipeline_utils import DiffusionPipeline
33
34
  from .renderer import ShapERenderer
34
35
 
35
36
 
37
+ if is_torch_xla_available():
38
+ import torch_xla.core.xla_model as xm
39
+
40
+ XLA_AVAILABLE = True
41
+ else:
42
+ XLA_AVAILABLE = False
43
+
36
44
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
37
45
 
46
+
38
47
  EXAMPLE_DOC_STRING = """
39
48
  Examples:
40
49
  ```py
@@ -291,6 +300,9 @@ class ShapEPipeline(DiffusionPipeline):
291
300
  sample=latents,
292
301
  ).prev_sample
293
302
 
303
+ if XLA_AVAILABLE:
304
+ xm.mark_step()
305
+
294
306
  # Offload all models
295
307
  self.maybe_free_model_hooks()
296
308
 
@@ -24,6 +24,7 @@ from ...models import PriorTransformer
24
24
  from ...schedulers import HeunDiscreteScheduler
25
25
  from ...utils import (
26
26
  BaseOutput,
27
+ is_torch_xla_available,
27
28
  logging,
28
29
  replace_example_docstring,
29
30
  )
@@ -32,8 +33,16 @@ from ..pipeline_utils import DiffusionPipeline
32
33
  from .renderer import ShapERenderer
33
34
 
34
35
 
36
+ if is_torch_xla_available():
37
+ import torch_xla.core.xla_model as xm
38
+
39
+ XLA_AVAILABLE = True
40
+ else:
41
+ XLA_AVAILABLE = False
42
+
35
43
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
36
44
 
45
+
37
46
  EXAMPLE_DOC_STRING = """
38
47
  Examples:
39
48
  ```py
@@ -278,6 +287,9 @@ class ShapEImg2ImgPipeline(DiffusionPipeline):
278
287
  sample=latents,
279
288
  ).prev_sample
280
289
 
290
+ if XLA_AVAILABLE:
291
+ xm.mark_step()
292
+
281
293
  if output_type not in ["np", "pil", "latent", "mesh"]:
282
294
  raise ValueError(
283
295
  f"Only the output types `pil`, `np`, `latent` and `mesh` are supported not output_type={output_type}"
@@ -983,9 +983,9 @@ class ShapERenderer(ModelMixin, ConfigMixin):
983
983
  fields = torch.cat(fields, dim=1)
984
984
  fields = fields.float()
985
985
 
986
- assert (
987
- len(fields.shape) == 3 and fields.shape[-1] == 1
988
- ), f"expected [meta_batch x inner_batch] SDF results, but got {fields.shape}"
986
+ assert len(fields.shape) == 3 and fields.shape[-1] == 1, (
987
+ f"expected [meta_batch x inner_batch] SDF results, but got {fields.shape}"
988
+ )
989
989
 
990
990
  fields = fields.reshape(1, *([grid_size] * 3))
991
991
 
@@ -1039,9 +1039,9 @@ class ShapERenderer(ModelMixin, ConfigMixin):
1039
1039
  textures = textures.float()
1040
1040
 
1041
1041
  # 3.3 augument the mesh with texture data
1042
- assert len(textures.shape) == 3 and textures.shape[-1] == len(
1043
- texture_channels
1044
- ), f"expected [meta_batch x inner_batch x texture_channels] field results, but got {textures.shape}"
1042
+ assert len(textures.shape) == 3 and textures.shape[-1] == len(texture_channels), (
1043
+ f"expected [meta_batch x inner_batch x texture_channels] field results, but got {textures.shape}"
1044
+ )
1045
1045
 
1046
1046
  for m, texture in zip(raw_meshes, textures):
1047
1047
  texture = texture[: len(m.verts)]
@@ -584,7 +584,7 @@ class StableAudioPipeline(DiffusionPipeline):
584
584
 
585
585
  if audio_end_in_s - audio_start_in_s > max_audio_length_in_s:
586
586
  raise ValueError(
587
- f"The total audio length requested ({audio_end_in_s-audio_start_in_s}s) is longer than the model maximum possible length ({max_audio_length_in_s}). Make sure that 'audio_end_in_s-audio_start_in_s<={max_audio_length_in_s}'."
587
+ f"The total audio length requested ({audio_end_in_s - audio_start_in_s}s) is longer than the model maximum possible length ({max_audio_length_in_s}). Make sure that 'audio_end_in_s-audio_start_in_s<={max_audio_length_in_s}'."
588
588
  )
589
589
 
590
590
  waveform_start = int(audio_start_in_s * self.vae.config.sampling_rate)
@@ -15,18 +15,26 @@
15
15
  from typing import Callable, Dict, List, Optional, Union
16
16
 
17
17
  import torch
18
- from transformers import CLIPTextModel, CLIPTokenizer
18
+ from transformers import CLIPTextModelWithProjection, CLIPTokenizer
19
19
 
20
20
  from ...models import StableCascadeUNet
21
21
  from ...schedulers import DDPMWuerstchenScheduler
22
- from ...utils import is_torch_version, logging, replace_example_docstring
22
+ from ...utils import is_torch_version, is_torch_xla_available, logging, replace_example_docstring
23
23
  from ...utils.torch_utils import randn_tensor
24
24
  from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
25
25
  from ..wuerstchen.modeling_paella_vq_model import PaellaVQModel
26
26
 
27
27
 
28
+ if is_torch_xla_available():
29
+ import torch_xla.core.xla_model as xm
30
+
31
+ XLA_AVAILABLE = True
32
+ else:
33
+ XLA_AVAILABLE = False
34
+
28
35
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
29
36
 
37
+
30
38
  EXAMPLE_DOC_STRING = """
31
39
  Examples:
32
40
  ```py
@@ -57,7 +65,7 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
57
65
  Args:
58
66
  tokenizer (`CLIPTokenizer`):
59
67
  The CLIP tokenizer.
60
- text_encoder (`CLIPTextModel`):
68
+ text_encoder (`CLIPTextModelWithProjection`):
61
69
  The CLIP text encoder.
62
70
  decoder ([`StableCascadeUNet`]):
63
71
  The Stable Cascade decoder unet.
@@ -85,7 +93,7 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
85
93
  self,
86
94
  decoder: StableCascadeUNet,
87
95
  tokenizer: CLIPTokenizer,
88
- text_encoder: CLIPTextModel,
96
+ text_encoder: CLIPTextModelWithProjection,
89
97
  scheduler: DDPMWuerstchenScheduler,
90
98
  vqgan: PaellaVQModel,
91
99
  latent_dim_scale: float = 10.67,
@@ -503,6 +511,9 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
503
511
  prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
504
512
  negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
505
513
 
514
+ if XLA_AVAILABLE:
515
+ xm.mark_step()
516
+
506
517
  if output_type not in ["pt", "np", "pil", "latent"]:
507
518
  raise ValueError(
508
519
  f"Only the output types `pt`, `np`, `pil` and `latent` are supported not output_type={output_type}"
@@ -15,7 +15,7 @@ from typing import Callable, Dict, List, Optional, Union
15
15
 
16
16
  import PIL
17
17
  import torch
18
- from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
18
+ from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTokenizer, CLIPVisionModelWithProjection
19
19
 
20
20
  from ...models import StableCascadeUNet
21
21
  from ...schedulers import DDPMWuerstchenScheduler
@@ -52,7 +52,7 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
52
52
  Args:
53
53
  tokenizer (`CLIPTokenizer`):
54
54
  The decoder tokenizer to be used for text inputs.
55
- text_encoder (`CLIPTextModel`):
55
+ text_encoder (`CLIPTextModelWithProjection`):
56
56
  The decoder text encoder to be used for text inputs.
57
57
  decoder (`StableCascadeUNet`):
58
58
  The decoder model to be used for decoder image generation pipeline.
@@ -60,14 +60,18 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
60
60
  The scheduler to be used for decoder image generation pipeline.
61
61
  vqgan (`PaellaVQModel`):
62
62
  The VQGAN model to be used for decoder image generation pipeline.
63
- feature_extractor ([`~transformers.CLIPImageProcessor`]):
64
- Model that extracts features from generated images to be used as inputs for the `image_encoder`.
65
- image_encoder ([`CLIPVisionModelWithProjection`]):
66
- Frozen CLIP image-encoder ([clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)).
67
63
  prior_prior (`StableCascadeUNet`):
68
64
  The prior model to be used for prior pipeline.
65
+ prior_text_encoder (`CLIPTextModelWithProjection`):
66
+ The prior text encoder to be used for text inputs.
67
+ prior_tokenizer (`CLIPTokenizer`):
68
+ The prior tokenizer to be used for text inputs.
69
69
  prior_scheduler (`DDPMWuerstchenScheduler`):
70
70
  The scheduler to be used for prior pipeline.
71
+ prior_feature_extractor ([`~transformers.CLIPImageProcessor`]):
72
+ Model that extracts features from generated images to be used as inputs for the `image_encoder`.
73
+ prior_image_encoder ([`CLIPVisionModelWithProjection`]):
74
+ Frozen CLIP image-encoder ([clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)).
71
75
  """
72
76
 
73
77
  _load_connected_pipes = True
@@ -76,12 +80,12 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
76
80
  def __init__(
77
81
  self,
78
82
  tokenizer: CLIPTokenizer,
79
- text_encoder: CLIPTextModel,
83
+ text_encoder: CLIPTextModelWithProjection,
80
84
  decoder: StableCascadeUNet,
81
85
  scheduler: DDPMWuerstchenScheduler,
82
86
  vqgan: PaellaVQModel,
83
87
  prior_prior: StableCascadeUNet,
84
- prior_text_encoder: CLIPTextModel,
88
+ prior_text_encoder: CLIPTextModelWithProjection,
85
89
  prior_tokenizer: CLIPTokenizer,
86
90
  prior_scheduler: DDPMWuerstchenScheduler,
87
91
  prior_feature_extractor: Optional[CLIPImageProcessor] = None,
@@ -23,13 +23,21 @@ from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTo
23
23
 
24
24
  from ...models import StableCascadeUNet
25
25
  from ...schedulers import DDPMWuerstchenScheduler
26
- from ...utils import BaseOutput, logging, replace_example_docstring
26
+ from ...utils import BaseOutput, is_torch_xla_available, logging, replace_example_docstring
27
27
  from ...utils.torch_utils import randn_tensor
28
28
  from ..pipeline_utils import DiffusionPipeline
29
29
 
30
30
 
31
+ if is_torch_xla_available():
32
+ import torch_xla.core.xla_model as xm
33
+
34
+ XLA_AVAILABLE = True
35
+ else:
36
+ XLA_AVAILABLE = False
37
+
31
38
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
32
39
 
40
+
33
41
  DEFAULT_STAGE_C_TIMESTEPS = list(np.linspace(1.0, 2 / 3, 20)) + list(np.linspace(2 / 3, 0.0, 11))[1:]
34
42
 
35
43
  EXAMPLE_DOC_STRING = """
@@ -611,6 +619,9 @@ class StableCascadePriorPipeline(DiffusionPipeline):
611
619
  prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
612
620
  negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
613
621
 
622
+ if XLA_AVAILABLE:
623
+ xm.mark_step()
624
+
614
625
  # Offload all models
615
626
  self.maybe_free_model_hooks()
616
627
 
@@ -1,5 +1,5 @@
1
1
  # coding=utf-8
2
- # Copyright 2024 The HuggingFace Inc. team.
2
+ # Copyright 2025 The HuggingFace Inc. team.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
5
  # you may not use this file except in compliance with the License.
@@ -52,6 +52,7 @@ from ...schedulers import (
52
52
  UnCLIPScheduler,
53
53
  )
54
54
  from ...utils import is_accelerate_available, logging
55
+ from ...utils.constants import DIFFUSERS_REQUEST_TIMEOUT
55
56
  from ..latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel
56
57
  from ..paint_by_example import PaintByExampleImageEncoder
57
58
  from ..pipeline_utils import DiffusionPipeline
@@ -1324,7 +1325,7 @@ def download_from_original_stable_diffusion_ckpt(
1324
1325
  config_url = "https://raw.githubusercontent.com/Stability-AI/stablediffusion/main/configs/stable-diffusion/x4-upscaling.yaml"
1325
1326
 
1326
1327
  if config_url is not None:
1327
- original_config_file = BytesIO(requests.get(config_url).content)
1328
+ original_config_file = BytesIO(requests.get(config_url, timeout=DIFFUSERS_REQUEST_TIMEOUT).content)
1328
1329
  else:
1329
1330
  with open(original_config_file, "r") as f:
1330
1331
  original_config_file = f.read()
@@ -55,7 +55,7 @@ EXAMPLE_DOC_STRING = """
55
55
  >>> from diffusers import FlaxStableDiffusionPipeline
56
56
 
57
57
  >>> pipeline, params = FlaxStableDiffusionPipeline.from_pretrained(
58
- ... "runwayml/stable-diffusion-v1-5", variant="bf16", dtype=jax.numpy.bfloat16
58
+ ... "stable-diffusion-v1-5/stable-diffusion-v1-5", variant="bf16", dtype=jax.numpy.bfloat16
59
59
  ... )
60
60
 
61
61
  >>> prompt = "a photo of an astronaut riding a horse on mars"
@@ -100,8 +100,8 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline):
100
100
  [`FlaxDPMSolverMultistepScheduler`].
101
101
  safety_checker ([`FlaxStableDiffusionSafetyChecker`]):
102
102
  Classification module that estimates whether generated images could be considered offensive or harmful.
103
- Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
104
- about a model's potential harms.
103
+ Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
104
+ more details about a model's potential harms.
105
105
  feature_extractor ([`~transformers.CLIPImageProcessor`]):
106
106
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
107
107
  """
@@ -132,17 +132,21 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline):
132
132
  " information, please have a look at https://github.com/huggingface/diffusers/pull/254 ."
133
133
  )
134
134
 
135
- is_unet_version_less_0_9_0 = hasattr(unet.config, "_diffusers_version") and version.parse(
136
- version.parse(unet.config._diffusers_version).base_version
137
- ) < version.parse("0.9.0.dev0")
138
- is_unet_sample_size_less_64 = hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
135
+ is_unet_version_less_0_9_0 = (
136
+ unet is not None
137
+ and hasattr(unet.config, "_diffusers_version")
138
+ and version.parse(version.parse(unet.config._diffusers_version).base_version) < version.parse("0.9.0.dev0")
139
+ )
140
+ is_unet_sample_size_less_64 = (
141
+ unet is not None and hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
142
+ )
139
143
  if is_unet_version_less_0_9_0 and is_unet_sample_size_less_64:
140
144
  deprecation_message = (
141
145
  "The configuration file of the unet has set the default `sample_size` to smaller than"
142
146
  " 64 which seems highly unlikely .If you're checkpoint is a fine-tuned version of any of the"
143
147
  " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-"
144
- " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5"
145
- " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
148
+ " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5"
149
+ " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
146
150
  " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`"
147
151
  " in the config might lead to incorrect results in future versions. If you have downloaded this"
148
152
  " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for"
@@ -162,7 +166,7 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline):
162
166
  safety_checker=safety_checker,
163
167
  feature_extractor=feature_extractor,
164
168
  )
165
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
169
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
166
170
 
167
171
  def prepare_inputs(self, prompt: Union[str, List[str]]):
168
172
  if not isinstance(prompt, (str, list)):
@@ -124,8 +124,8 @@ class FlaxStableDiffusionImg2ImgPipeline(FlaxDiffusionPipeline):
124
124
  [`FlaxDPMSolverMultistepScheduler`].
125
125
  safety_checker ([`FlaxStableDiffusionSafetyChecker`]):
126
126
  Classification module that estimates whether generated images could be considered offensive or harmful.
127
- Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
128
- about a model's potential harms.
127
+ Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
128
+ more details about a model's potential harms.
129
129
  feature_extractor ([`~transformers.CLIPImageProcessor`]):
130
130
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
131
131
  """
@@ -165,7 +165,7 @@ class FlaxStableDiffusionImg2ImgPipeline(FlaxDiffusionPipeline):
165
165
  safety_checker=safety_checker,
166
166
  feature_extractor=feature_extractor,
167
167
  )
168
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
168
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
169
169
 
170
170
  def prepare_inputs(self, prompt: Union[str, List[str]], image: Union[Image.Image, List[Image.Image]]):
171
171
  if not isinstance(prompt, (str, list)):
@@ -127,8 +127,8 @@ class FlaxStableDiffusionInpaintPipeline(FlaxDiffusionPipeline):
127
127
  [`FlaxDPMSolverMultistepScheduler`].
128
128
  safety_checker ([`FlaxStableDiffusionSafetyChecker`]):
129
129
  Classification module that estimates whether generated images could be considered offensive or harmful.
130
- Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
131
- about a model's potential harms.
130
+ Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
131
+ more details about a model's potential harms.
132
132
  feature_extractor ([`~transformers.CLIPImageProcessor`]):
133
133
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
134
134
  """
@@ -159,17 +159,21 @@ class FlaxStableDiffusionInpaintPipeline(FlaxDiffusionPipeline):
159
159
  " information, please have a look at https://github.com/huggingface/diffusers/pull/254 ."
160
160
  )
161
161
 
162
- is_unet_version_less_0_9_0 = hasattr(unet.config, "_diffusers_version") and version.parse(
163
- version.parse(unet.config._diffusers_version).base_version
164
- ) < version.parse("0.9.0.dev0")
165
- is_unet_sample_size_less_64 = hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
162
+ is_unet_version_less_0_9_0 = (
163
+ unet is not None
164
+ and hasattr(unet.config, "_diffusers_version")
165
+ and version.parse(version.parse(unet.config._diffusers_version).base_version) < version.parse("0.9.0.dev0")
166
+ )
167
+ is_unet_sample_size_less_64 = (
168
+ unet is not None and hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
169
+ )
166
170
  if is_unet_version_less_0_9_0 and is_unet_sample_size_less_64:
167
171
  deprecation_message = (
168
172
  "The configuration file of the unet has set the default `sample_size` to smaller than"
169
173
  " 64 which seems highly unlikely .If you're checkpoint is a fine-tuned version of any of the"
170
174
  " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-"
171
- " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5"
172
- " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
175
+ " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5"
176
+ " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
173
177
  " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`"
174
178
  " in the config might lead to incorrect results in future versions. If you have downloaded this"
175
179
  " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for"
@@ -189,7 +193,7 @@ class FlaxStableDiffusionInpaintPipeline(FlaxDiffusionPipeline):
189
193
  safety_checker=safety_checker,
190
194
  feature_extractor=feature_extractor,
191
195
  )
192
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
196
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
193
197
 
194
198
  def prepare_inputs(
195
199
  self,
@@ -331,7 +335,7 @@ class FlaxStableDiffusionInpaintPipeline(FlaxDiffusionPipeline):
331
335
  f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects"
332
336
  f" {self.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +"
333
337
  f" `num_channels_mask`: {num_channels_mask} + `num_channels_masked_image`: {num_channels_masked_image}"
334
- f" = {num_channels_latents+num_channels_masked_image+num_channels_mask}. Please verify the config of"
338
+ f" = {num_channels_latents + num_channels_masked_image + num_channels_mask}. Please verify the config of"
335
339
  " `pipeline.unet` or your `mask_image` or `image` input."
336
340
  )
337
341
 
@@ -57,7 +57,7 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
57
57
  ):
58
58
  super().__init__()
59
59
 
60
- if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1:
60
+ if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1:
61
61
  deprecation_message = (
62
62
  f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`"
63
63
  f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure "
@@ -71,7 +71,7 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
71
71
  new_config["steps_offset"] = 1
72
72
  scheduler._internal_dict = FrozenDict(new_config)
73
73
 
74
- if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True:
74
+ if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True:
75
75
  deprecation_message = (
76
76
  f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`."
77
77
  " `clip_sample` should be set to False in the configuration file. Please make sure to update the"
@@ -78,7 +78,8 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
78
78
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
79
79
  safety_checker ([`StableDiffusionSafetyChecker`]):
80
80
  Classification module that estimates whether generated images could be considered offensive or harmful.
81
- Please, refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for details.
81
+ Please, refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
82
+ details.
82
83
  feature_extractor ([`CLIPImageProcessor`]):
83
84
  Model that extracts features from generated images to be used as inputs for the `safety_checker`.
84
85
  """
@@ -109,7 +110,7 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
109
110
  ):
110
111
  super().__init__()
111
112
 
112
- if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1:
113
+ if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1:
113
114
  deprecation_message = (
114
115
  f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`"
115
116
  f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure "
@@ -123,7 +124,7 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
123
124
  new_config["steps_offset"] = 1
124
125
  scheduler._internal_dict = FrozenDict(new_config)
125
126
 
126
- if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True:
127
+ if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True:
127
128
  deprecation_message = (
128
129
  f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`."
129
130
  " `clip_sample` should be set to False in the configuration file. Please make sure to update the"
@@ -76,7 +76,8 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
76
76
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
77
77
  safety_checker ([`StableDiffusionSafetyChecker`]):
78
78
  Classification module that estimates whether generated images could be considered offensive or harmful.
79
- Please, refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for details.
79
+ Please, refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
80
+ details.
80
81
  feature_extractor ([`CLIPImageProcessor`]):
81
82
  Model that extracts features from generated images to be used as inputs for the `safety_checker`.
82
83
  """
@@ -108,7 +109,7 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
108
109
  super().__init__()
109
110
  logger.info("`OnnxStableDiffusionInpaintPipeline` is experimental and will very likely change in the future.")
110
111
 
111
- if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1:
112
+ if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1:
112
113
  deprecation_message = (
113
114
  f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`"
114
115
  f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure "
@@ -122,7 +123,7 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
122
123
  new_config["steps_offset"] = 1
123
124
  scheduler._internal_dict = FrozenDict(new_config)
124
125
 
125
- if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True:
126
+ if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True:
126
127
  deprecation_message = (
127
128
  f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`."
128
129
  " `clip_sample` should be set to False in the configuration file. Please make sure to update the"
@@ -474,7 +475,7 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
474
475
  "Incorrect configuration settings! The config of `pipeline.unet` expects"
475
476
  f" {unet_input_channels} but received `num_channels_latents`: {num_channels_latents} +"
476
477
  f" `num_channels_mask`: {num_channels_mask} + `num_channels_masked_image`: {num_channels_masked_image}"
477
- f" = {num_channels_latents+num_channels_masked_image+num_channels_mask}. Please verify the config of"
478
+ f" = {num_channels_latents + num_channels_masked_image + num_channels_mask}. Please verify the config of"
478
479
  " `pipeline.unet` or your `mask_image` or `image` input."
479
480
  )
480
481
 
@@ -83,7 +83,7 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
83
83
  ):
84
84
  super().__init__()
85
85
 
86
- if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1:
86
+ if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1:
87
87
  deprecation_message = (
88
88
  f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`"
89
89
  f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure "
@@ -97,7 +97,7 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
97
97
  new_config["steps_offset"] = 1
98
98
  scheduler._internal_dict = FrozenDict(new_config)
99
99
 
100
- if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True:
100
+ if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True:
101
101
  deprecation_message = (
102
102
  f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`."
103
103
  " `clip_sample` should be set to False in the configuration file. Please make sure to update the"
@@ -55,7 +55,9 @@ EXAMPLE_DOC_STRING = """
55
55
  >>> import torch
56
56
  >>> from diffusers import StableDiffusionPipeline
57
57
 
58
- >>> pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
58
+ >>> pipe = StableDiffusionPipeline.from_pretrained(
59
+ ... "stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16
60
+ ... )
59
61
  >>> pipe = pipe.to("cuda")
60
62
 
61
63
  >>> prompt = "a photo of an astronaut riding a horse on mars"
@@ -184,8 +186,8 @@ class StableDiffusionPipeline(
184
186
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
185
187
  safety_checker ([`StableDiffusionSafetyChecker`]):
186
188
  Classification module that estimates whether generated images could be considered offensive or harmful.
187
- Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
188
- about a model's potential harms.
189
+ Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
190
+ more details about a model's potential harms.
189
191
  feature_extractor ([`~transformers.CLIPImageProcessor`]):
190
192
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
191
193
  """
@@ -209,7 +211,7 @@ class StableDiffusionPipeline(
209
211
  ):
210
212
  super().__init__()
211
213
 
212
- if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1:
214
+ if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1:
213
215
  deprecation_message = (
214
216
  f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`"
215
217
  f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure "
@@ -223,7 +225,7 @@ class StableDiffusionPipeline(
223
225
  new_config["steps_offset"] = 1
224
226
  scheduler._internal_dict = FrozenDict(new_config)
225
227
 
226
- if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True:
228
+ if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True:
227
229
  deprecation_message = (
228
230
  f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`."
229
231
  " `clip_sample` should be set to False in the configuration file. Please make sure to update the"
@@ -252,12 +254,15 @@ class StableDiffusionPipeline(
252
254
  " checker. If you do not want to use the safety checker, you can pass `'safety_checker=None'` instead."
253
255
  )
254
256
 
255
- is_unet_version_less_0_9_0 = hasattr(unet.config, "_diffusers_version") and version.parse(
256
- version.parse(unet.config._diffusers_version).base_version
257
- ) < version.parse("0.9.0.dev0")
258
- self._is_unet_config_sample_size_int = isinstance(unet.config.sample_size, int)
257
+ is_unet_version_less_0_9_0 = (
258
+ unet is not None
259
+ and hasattr(unet.config, "_diffusers_version")
260
+ and version.parse(version.parse(unet.config._diffusers_version).base_version) < version.parse("0.9.0.dev0")
261
+ )
262
+ self._is_unet_config_sample_size_int = unet is not None and isinstance(unet.config.sample_size, int)
259
263
  is_unet_sample_size_less_64 = (
260
- hasattr(unet.config, "sample_size")
264
+ unet is not None
265
+ and hasattr(unet.config, "sample_size")
261
266
  and self._is_unet_config_sample_size_int
262
267
  and unet.config.sample_size < 64
263
268
  )
@@ -266,8 +271,8 @@ class StableDiffusionPipeline(
266
271
  "The configuration file of the unet has set the default `sample_size` to smaller than"
267
272
  " 64 which seems highly unlikely. If your checkpoint is a fine-tuned version of any of the"
268
273
  " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-"
269
- " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5"
270
- " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
274
+ " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5"
275
+ " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
271
276
  " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`"
272
277
  " in the config might lead to incorrect results in future versions. If you have downloaded this"
273
278
  " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for"
@@ -288,7 +293,7 @@ class StableDiffusionPipeline(
288
293
  feature_extractor=feature_extractor,
289
294
  image_encoder=image_encoder,
290
295
  )
291
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
296
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
292
297
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
293
298
  self.register_to_config(requires_safety_checker=requires_safety_checker)
294
299