diffusers 0.32.2__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (389) hide show
  1. diffusers/__init__.py +186 -3
  2. diffusers/configuration_utils.py +40 -12
  3. diffusers/dependency_versions_table.py +9 -2
  4. diffusers/hooks/__init__.py +9 -0
  5. diffusers/hooks/faster_cache.py +653 -0
  6. diffusers/hooks/group_offloading.py +793 -0
  7. diffusers/hooks/hooks.py +236 -0
  8. diffusers/hooks/layerwise_casting.py +245 -0
  9. diffusers/hooks/pyramid_attention_broadcast.py +311 -0
  10. diffusers/loaders/__init__.py +6 -0
  11. diffusers/loaders/ip_adapter.py +38 -30
  12. diffusers/loaders/lora_base.py +121 -86
  13. diffusers/loaders/lora_conversion_utils.py +504 -44
  14. diffusers/loaders/lora_pipeline.py +1769 -181
  15. diffusers/loaders/peft.py +167 -57
  16. diffusers/loaders/single_file.py +17 -2
  17. diffusers/loaders/single_file_model.py +53 -5
  18. diffusers/loaders/single_file_utils.py +646 -72
  19. diffusers/loaders/textual_inversion.py +9 -9
  20. diffusers/loaders/transformer_flux.py +8 -9
  21. diffusers/loaders/transformer_sd3.py +120 -39
  22. diffusers/loaders/unet.py +20 -7
  23. diffusers/models/__init__.py +22 -0
  24. diffusers/models/activations.py +9 -9
  25. diffusers/models/attention.py +0 -1
  26. diffusers/models/attention_processor.py +163 -25
  27. diffusers/models/auto_model.py +169 -0
  28. diffusers/models/autoencoders/__init__.py +2 -0
  29. diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
  30. diffusers/models/autoencoders/autoencoder_dc.py +106 -4
  31. diffusers/models/autoencoders/autoencoder_kl.py +0 -4
  32. diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
  33. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
  34. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
  35. diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
  36. diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
  37. diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
  38. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
  39. diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
  40. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
  41. diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
  42. diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
  43. diffusers/models/autoencoders/vae.py +31 -141
  44. diffusers/models/autoencoders/vq_model.py +3 -0
  45. diffusers/models/cache_utils.py +108 -0
  46. diffusers/models/controlnets/__init__.py +1 -0
  47. diffusers/models/controlnets/controlnet.py +3 -8
  48. diffusers/models/controlnets/controlnet_flux.py +14 -42
  49. diffusers/models/controlnets/controlnet_sd3.py +58 -34
  50. diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
  51. diffusers/models/controlnets/controlnet_union.py +27 -18
  52. diffusers/models/controlnets/controlnet_xs.py +7 -46
  53. diffusers/models/controlnets/multicontrolnet_union.py +196 -0
  54. diffusers/models/embeddings.py +18 -7
  55. diffusers/models/model_loading_utils.py +122 -80
  56. diffusers/models/modeling_flax_pytorch_utils.py +1 -1
  57. diffusers/models/modeling_flax_utils.py +1 -1
  58. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  59. diffusers/models/modeling_utils.py +617 -272
  60. diffusers/models/normalization.py +67 -14
  61. diffusers/models/resnet.py +1 -1
  62. diffusers/models/transformers/__init__.py +6 -0
  63. diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
  64. diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
  65. diffusers/models/transformers/consisid_transformer_3d.py +789 -0
  66. diffusers/models/transformers/dit_transformer_2d.py +5 -19
  67. diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
  68. diffusers/models/transformers/latte_transformer_3d.py +20 -15
  69. diffusers/models/transformers/lumina_nextdit2d.py +3 -1
  70. diffusers/models/transformers/pixart_transformer_2d.py +4 -19
  71. diffusers/models/transformers/prior_transformer.py +5 -1
  72. diffusers/models/transformers/sana_transformer.py +144 -40
  73. diffusers/models/transformers/stable_audio_transformer.py +5 -20
  74. diffusers/models/transformers/transformer_2d.py +7 -22
  75. diffusers/models/transformers/transformer_allegro.py +9 -17
  76. diffusers/models/transformers/transformer_cogview3plus.py +6 -17
  77. diffusers/models/transformers/transformer_cogview4.py +462 -0
  78. diffusers/models/transformers/transformer_easyanimate.py +527 -0
  79. diffusers/models/transformers/transformer_flux.py +68 -110
  80. diffusers/models/transformers/transformer_hunyuan_video.py +404 -46
  81. diffusers/models/transformers/transformer_ltx.py +53 -35
  82. diffusers/models/transformers/transformer_lumina2.py +548 -0
  83. diffusers/models/transformers/transformer_mochi.py +6 -17
  84. diffusers/models/transformers/transformer_omnigen.py +469 -0
  85. diffusers/models/transformers/transformer_sd3.py +56 -86
  86. diffusers/models/transformers/transformer_temporal.py +5 -11
  87. diffusers/models/transformers/transformer_wan.py +469 -0
  88. diffusers/models/unets/unet_1d.py +3 -1
  89. diffusers/models/unets/unet_2d.py +21 -20
  90. diffusers/models/unets/unet_2d_blocks.py +19 -243
  91. diffusers/models/unets/unet_2d_condition.py +4 -6
  92. diffusers/models/unets/unet_3d_blocks.py +14 -127
  93. diffusers/models/unets/unet_3d_condition.py +8 -12
  94. diffusers/models/unets/unet_i2vgen_xl.py +5 -13
  95. diffusers/models/unets/unet_kandinsky3.py +0 -4
  96. diffusers/models/unets/unet_motion_model.py +20 -114
  97. diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
  98. diffusers/models/unets/unet_stable_cascade.py +8 -35
  99. diffusers/models/unets/uvit_2d.py +1 -4
  100. diffusers/optimization.py +2 -2
  101. diffusers/pipelines/__init__.py +57 -8
  102. diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
  103. diffusers/pipelines/amused/pipeline_amused.py +15 -2
  104. diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
  105. diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
  106. diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
  107. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
  108. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
  109. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
  110. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
  111. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
  112. diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
  113. diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
  114. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
  115. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
  116. diffusers/pipelines/auto_pipeline.py +35 -14
  117. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  118. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
  119. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
  120. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
  121. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
  122. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
  123. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
  124. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
  125. diffusers/pipelines/cogview4/__init__.py +49 -0
  126. diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
  127. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
  128. diffusers/pipelines/cogview4/pipeline_output.py +21 -0
  129. diffusers/pipelines/consisid/__init__.py +49 -0
  130. diffusers/pipelines/consisid/consisid_utils.py +357 -0
  131. diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
  132. diffusers/pipelines/consisid/pipeline_output.py +20 -0
  133. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
  134. diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
  135. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
  136. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
  137. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
  138. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
  139. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
  140. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
  141. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
  142. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
  143. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
  144. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
  145. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
  146. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
  147. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
  148. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
  149. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
  150. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
  151. diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
  152. diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
  153. diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
  154. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
  155. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
  156. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
  157. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
  158. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
  159. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
  160. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
  161. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
  162. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
  163. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
  164. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
  165. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
  166. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
  167. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
  168. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
  169. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
  170. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
  171. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
  172. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
  173. diffusers/pipelines/dit/pipeline_dit.py +15 -2
  174. diffusers/pipelines/easyanimate/__init__.py +52 -0
  175. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
  176. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
  177. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
  178. diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
  179. diffusers/pipelines/flux/pipeline_flux.py +53 -21
  180. diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
  181. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
  182. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
  183. diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
  184. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
  185. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
  186. diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
  187. diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
  188. diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
  189. diffusers/pipelines/free_noise_utils.py +3 -3
  190. diffusers/pipelines/hunyuan_video/__init__.py +4 -0
  191. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
  192. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
  193. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
  194. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
  195. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
  196. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
  197. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
  198. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
  199. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
  200. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
  201. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
  202. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
  203. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
  204. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
  205. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
  206. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
  207. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
  208. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
  209. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
  210. diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
  211. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
  212. diffusers/pipelines/kolors/text_encoder.py +7 -34
  213. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
  214. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
  215. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
  216. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
  217. diffusers/pipelines/latte/pipeline_latte.py +36 -7
  218. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
  219. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
  220. diffusers/pipelines/ltx/__init__.py +2 -0
  221. diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
  222. diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
  223. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
  224. diffusers/pipelines/lumina/__init__.py +2 -2
  225. diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
  226. diffusers/pipelines/lumina2/__init__.py +48 -0
  227. diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
  228. diffusers/pipelines/marigold/__init__.py +2 -0
  229. diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
  230. diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
  231. diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
  232. diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
  233. diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
  234. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
  235. diffusers/pipelines/omnigen/__init__.py +50 -0
  236. diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
  237. diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
  238. diffusers/pipelines/onnx_utils.py +5 -3
  239. diffusers/pipelines/pag/pag_utils.py +1 -1
  240. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
  241. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
  242. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
  243. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
  244. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
  245. diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
  246. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
  247. diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
  248. diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
  249. diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
  250. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
  251. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
  252. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
  253. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
  254. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
  255. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
  256. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
  257. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
  258. diffusers/pipelines/pia/pipeline_pia.py +13 -1
  259. diffusers/pipelines/pipeline_flax_utils.py +7 -7
  260. diffusers/pipelines/pipeline_loading_utils.py +193 -83
  261. diffusers/pipelines/pipeline_utils.py +221 -106
  262. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
  263. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
  264. diffusers/pipelines/sana/__init__.py +2 -0
  265. diffusers/pipelines/sana/pipeline_sana.py +183 -58
  266. diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
  267. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
  268. diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
  269. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
  270. diffusers/pipelines/shap_e/renderer.py +6 -6
  271. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
  272. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
  273. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
  274. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
  275. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
  276. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
  277. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
  278. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
  279. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  280. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
  281. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
  282. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
  283. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
  284. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
  285. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
  286. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
  287. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
  288. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
  289. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
  290. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
  291. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
  292. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
  293. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
  294. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
  295. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
  296. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
  297. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
  298. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
  299. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
  300. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
  301. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
  302. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
  303. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
  304. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
  305. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
  306. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  307. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
  308. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
  309. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
  310. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
  311. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
  312. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
  313. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
  314. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
  315. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
  316. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
  317. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
  318. diffusers/pipelines/transformers_loading_utils.py +121 -0
  319. diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
  320. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
  321. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
  322. diffusers/pipelines/wan/__init__.py +51 -0
  323. diffusers/pipelines/wan/pipeline_output.py +20 -0
  324. diffusers/pipelines/wan/pipeline_wan.py +593 -0
  325. diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
  326. diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
  327. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
  328. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
  329. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
  330. diffusers/quantizers/auto.py +5 -1
  331. diffusers/quantizers/base.py +5 -9
  332. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
  333. diffusers/quantizers/bitsandbytes/utils.py +30 -20
  334. diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
  335. diffusers/quantizers/gguf/utils.py +4 -2
  336. diffusers/quantizers/quantization_config.py +59 -4
  337. diffusers/quantizers/quanto/__init__.py +1 -0
  338. diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
  339. diffusers/quantizers/quanto/utils.py +60 -0
  340. diffusers/quantizers/torchao/__init__.py +1 -1
  341. diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
  342. diffusers/schedulers/__init__.py +2 -1
  343. diffusers/schedulers/scheduling_consistency_models.py +1 -2
  344. diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
  345. diffusers/schedulers/scheduling_ddpm.py +2 -3
  346. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
  347. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
  348. diffusers/schedulers/scheduling_edm_euler.py +45 -10
  349. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
  350. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
  351. diffusers/schedulers/scheduling_heun_discrete.py +1 -1
  352. diffusers/schedulers/scheduling_lcm.py +1 -2
  353. diffusers/schedulers/scheduling_lms_discrete.py +1 -1
  354. diffusers/schedulers/scheduling_repaint.py +5 -1
  355. diffusers/schedulers/scheduling_scm.py +265 -0
  356. diffusers/schedulers/scheduling_tcd.py +1 -2
  357. diffusers/schedulers/scheduling_utils.py +2 -1
  358. diffusers/training_utils.py +14 -7
  359. diffusers/utils/__init__.py +9 -1
  360. diffusers/utils/constants.py +13 -1
  361. diffusers/utils/deprecation_utils.py +1 -1
  362. diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
  363. diffusers/utils/dummy_gguf_objects.py +17 -0
  364. diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
  365. diffusers/utils/dummy_pt_objects.py +233 -0
  366. diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
  367. diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
  368. diffusers/utils/dummy_torchao_objects.py +17 -0
  369. diffusers/utils/dynamic_modules_utils.py +1 -1
  370. diffusers/utils/export_utils.py +28 -3
  371. diffusers/utils/hub_utils.py +52 -102
  372. diffusers/utils/import_utils.py +121 -221
  373. diffusers/utils/loading_utils.py +2 -1
  374. diffusers/utils/logging.py +1 -2
  375. diffusers/utils/peft_utils.py +6 -14
  376. diffusers/utils/remote_utils.py +425 -0
  377. diffusers/utils/source_code_parsing_utils.py +52 -0
  378. diffusers/utils/state_dict_utils.py +15 -1
  379. diffusers/utils/testing_utils.py +243 -13
  380. diffusers/utils/torch_utils.py +10 -0
  381. diffusers/utils/typing_utils.py +91 -0
  382. diffusers/video_processor.py +1 -1
  383. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
  384. diffusers-0.33.0.dist-info/RECORD +608 -0
  385. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
  386. diffusers-0.32.2.dist-info/RECORD +0 -550
  387. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
  388. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
  389. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -170,10 +170,14 @@ class StableDiffusionXLKDiffusionPipeline(
170
170
  scheduler=scheduler,
171
171
  )
172
172
  self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
173
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
173
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
174
174
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
175
175
 
176
- self.default_sample_size = self.unet.config.sample_size
176
+ self.default_sample_size = (
177
+ self.unet.config.sample_size
178
+ if hasattr(self, "unet") and self.unet is not None and hasattr(self.unet.config, "sample_size")
179
+ else 128
180
+ )
177
181
 
178
182
  model = ModelWrapper(unet, scheduler.alphas_cumprod)
179
183
  if scheduler.config.prediction_type == "v_prediction":
@@ -321,7 +325,9 @@ class StableDiffusionXLKDiffusionPipeline(
321
325
  prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
322
326
 
323
327
  # We are only ALWAYS interested in the pooled output of the final text encoder
324
- pooled_prompt_embeds = prompt_embeds[0]
328
+ if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
329
+ pooled_prompt_embeds = prompt_embeds[0]
330
+
325
331
  if clip_skip is None:
326
332
  prompt_embeds = prompt_embeds.hidden_states[-2]
327
333
  else:
@@ -380,8 +386,10 @@ class StableDiffusionXLKDiffusionPipeline(
380
386
  uncond_input.input_ids.to(device),
381
387
  output_hidden_states=True,
382
388
  )
389
+
383
390
  # We are only ALWAYS interested in the pooled output of the final text encoder
384
- negative_pooled_prompt_embeds = negative_prompt_embeds[0]
391
+ if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
392
+ negative_pooled_prompt_embeds = negative_prompt_embeds[0]
385
393
  negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
386
394
 
387
395
  negative_prompt_embeds_list.append(negative_prompt_embeds)
@@ -30,6 +30,7 @@ from ...utils import (
30
30
  USE_PEFT_BACKEND,
31
31
  BaseOutput,
32
32
  deprecate,
33
+ is_torch_xla_available,
33
34
  logging,
34
35
  replace_example_docstring,
35
36
  scale_lora_layers,
@@ -40,8 +41,16 @@ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
40
41
  from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
41
42
 
42
43
 
44
+ if is_torch_xla_available():
45
+ import torch_xla.core.xla_model as xm
46
+
47
+ XLA_AVAILABLE = True
48
+ else:
49
+ XLA_AVAILABLE = False
50
+
43
51
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
44
52
 
53
+
45
54
  EXAMPLE_DOC_STRING = """
46
55
  Examples:
47
56
  ```python
@@ -203,8 +212,8 @@ class StableDiffusionLDM3DPipeline(
203
212
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
204
213
  safety_checker ([`StableDiffusionSafetyChecker`]):
205
214
  Classification module that estimates whether generated images could be considered offensive or harmful.
206
- Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
207
- about a model's potential harms.
215
+ Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
216
+ more details about a model's potential harms.
208
217
  feature_extractor ([`~transformers.CLIPImageProcessor`]):
209
218
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
210
219
  """
@@ -254,7 +263,7 @@ class StableDiffusionLDM3DPipeline(
254
263
  feature_extractor=feature_extractor,
255
264
  image_encoder=image_encoder,
256
265
  )
257
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
266
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
258
267
  self.image_processor = VaeImageProcessorLDM3D(vae_scale_factor=self.vae_scale_factor)
259
268
  self.register_to_config(requires_safety_checker=requires_safety_checker)
260
269
 
@@ -1002,6 +1011,9 @@ class StableDiffusionLDM3DPipeline(
1002
1011
  step_idx = i // getattr(self.scheduler, "order", 1)
1003
1012
  callback(step_idx, t, latents)
1004
1013
 
1014
+ if XLA_AVAILABLE:
1015
+ xm.mark_step()
1016
+
1005
1017
  if not output_type == "latent":
1006
1018
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
1007
1019
  image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
@@ -26,6 +26,7 @@ from ...schedulers import DDIMScheduler
26
26
  from ...utils import (
27
27
  USE_PEFT_BACKEND,
28
28
  deprecate,
29
+ is_torch_xla_available,
29
30
  logging,
30
31
  replace_example_docstring,
31
32
  scale_lora_layers,
@@ -37,8 +38,16 @@ from ..stable_diffusion import StableDiffusionPipelineOutput
37
38
  from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
38
39
 
39
40
 
41
+ if is_torch_xla_available():
42
+ import torch_xla.core.xla_model as xm
43
+
44
+ XLA_AVAILABLE = True
45
+ else:
46
+ XLA_AVAILABLE = False
47
+
40
48
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
41
49
 
50
+
42
51
  EXAMPLE_DOC_STRING = """
43
52
  Examples:
44
53
  ```py
@@ -179,8 +188,8 @@ class StableDiffusionPanoramaPipeline(
179
188
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
180
189
  safety_checker ([`StableDiffusionSafetyChecker`]):
181
190
  Classification module that estimates whether generated images could be considered offensive or harmful.
182
- Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
183
- about a model's potential harms.
191
+ Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
192
+ more details about a model's potential harms.
184
193
  feature_extractor ([`~transformers.CLIPImageProcessor`]):
185
194
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
186
195
  """
@@ -230,7 +239,7 @@ class StableDiffusionPanoramaPipeline(
230
239
  feature_extractor=feature_extractor,
231
240
  image_encoder=image_encoder,
232
241
  )
233
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
242
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
234
243
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
235
244
  self.register_to_config(requires_safety_checker=requires_safety_checker)
236
245
 
@@ -1155,6 +1164,9 @@ class StableDiffusionPanoramaPipeline(
1155
1164
  step_idx = i // getattr(self.scheduler, "order", 1)
1156
1165
  callback(step_idx, t, latents)
1157
1166
 
1167
+ if XLA_AVAILABLE:
1168
+ xm.mark_step()
1169
+
1158
1170
  if output_type != "latent":
1159
1171
  if circular_padding:
1160
1172
  image = self.decode_latents_with_padding(latents)
@@ -12,13 +12,20 @@ from ...image_processor import PipelineImageInput
12
12
  from ...loaders import IPAdapterMixin
13
13
  from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
14
14
  from ...schedulers import KarrasDiffusionSchedulers
15
- from ...utils import deprecate, logging
15
+ from ...utils import deprecate, is_torch_xla_available, logging
16
16
  from ...utils.torch_utils import randn_tensor
17
17
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
18
18
  from . import StableDiffusionSafePipelineOutput
19
19
  from .safety_checker import SafeStableDiffusionSafetyChecker
20
20
 
21
21
 
22
+ if is_torch_xla_available():
23
+ import torch_xla.core.xla_model as xm
24
+
25
+ XLA_AVAILABLE = True
26
+ else:
27
+ XLA_AVAILABLE = False
28
+
22
29
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
23
30
 
24
31
 
@@ -46,8 +53,8 @@ class StableDiffusionPipelineSafe(DiffusionPipeline, StableDiffusionMixin, IPAda
46
53
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
47
54
  safety_checker ([`StableDiffusionSafetyChecker`]):
48
55
  Classification module that estimates whether generated images could be considered offensive or harmful.
49
- Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
50
- about a model's potential harms.
56
+ Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
57
+ more details about a model's potential harms.
51
58
  feature_extractor ([`~transformers.CLIPImageProcessor`]):
52
59
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
53
60
  """
@@ -74,7 +81,7 @@ class StableDiffusionPipelineSafe(DiffusionPipeline, StableDiffusionMixin, IPAda
74
81
  " abuse, brutality, cruelty"
75
82
  )
76
83
 
77
- if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1:
84
+ if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1:
78
85
  deprecation_message = (
79
86
  f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`"
80
87
  f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure "
@@ -88,7 +95,7 @@ class StableDiffusionPipelineSafe(DiffusionPipeline, StableDiffusionMixin, IPAda
88
95
  new_config["steps_offset"] = 1
89
96
  scheduler._internal_dict = FrozenDict(new_config)
90
97
 
91
- if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True:
98
+ if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True:
92
99
  deprecation_message = (
93
100
  f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`."
94
101
  " `clip_sample` should be set to False in the configuration file. Please make sure to update the"
@@ -117,17 +124,21 @@ class StableDiffusionPipelineSafe(DiffusionPipeline, StableDiffusionMixin, IPAda
117
124
  " checker. If you do not want to use the safety checker, you can pass `'safety_checker=None'` instead."
118
125
  )
119
126
 
120
- is_unet_version_less_0_9_0 = hasattr(unet.config, "_diffusers_version") and version.parse(
121
- version.parse(unet.config._diffusers_version).base_version
122
- ) < version.parse("0.9.0.dev0")
123
- is_unet_sample_size_less_64 = hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
127
+ is_unet_version_less_0_9_0 = (
128
+ unet is not None
129
+ and hasattr(unet.config, "_diffusers_version")
130
+ and version.parse(version.parse(unet.config._diffusers_version).base_version) < version.parse("0.9.0.dev0")
131
+ )
132
+ is_unet_sample_size_less_64 = (
133
+ unet is not None and hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
134
+ )
124
135
  if is_unet_version_less_0_9_0 and is_unet_sample_size_less_64:
125
136
  deprecation_message = (
126
137
  "The configuration file of the unet has set the default `sample_size` to smaller than"
127
138
  " 64 which seems highly unlikely .If you're checkpoint is a fine-tuned version of any of the"
128
139
  " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-"
129
- " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5"
130
- " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
140
+ " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5"
141
+ " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
131
142
  " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`"
132
143
  " in the config might lead to incorrect results in future versions. If you have downloaded this"
133
144
  " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for"
@@ -149,7 +160,7 @@ class StableDiffusionPipelineSafe(DiffusionPipeline, StableDiffusionMixin, IPAda
149
160
  image_encoder=image_encoder,
150
161
  )
151
162
  self._safety_text_concept = safety_concept
152
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
163
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
153
164
  self.register_to_config(requires_safety_checker=requires_safety_checker)
154
165
 
155
166
  @property
@@ -739,6 +750,9 @@ class StableDiffusionPipelineSafe(DiffusionPipeline, StableDiffusionMixin, IPAda
739
750
  step_idx = i // getattr(self.scheduler, "order", 1)
740
751
  callback(step_idx, t, latents)
741
752
 
753
+ if XLA_AVAILABLE:
754
+ xm.mark_step()
755
+
742
756
  # 8. Post-processing
743
757
  image = self.decode_latents(latents)
744
758
 
@@ -27,6 +27,7 @@ from ...schedulers import KarrasDiffusionSchedulers
27
27
  from ...utils import (
28
28
  USE_PEFT_BACKEND,
29
29
  deprecate,
30
+ is_torch_xla_available,
30
31
  logging,
31
32
  replace_example_docstring,
32
33
  scale_lora_layers,
@@ -38,8 +39,16 @@ from ..stable_diffusion import StableDiffusionPipelineOutput
38
39
  from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
39
40
 
40
41
 
42
+ if is_torch_xla_available():
43
+ import torch_xla.core.xla_model as xm
44
+
45
+ XLA_AVAILABLE = True
46
+ else:
47
+ XLA_AVAILABLE = False
48
+
41
49
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
42
50
 
51
+
43
52
  EXAMPLE_DOC_STRING = """
44
53
  Examples:
45
54
  ```py
@@ -47,7 +56,7 @@ EXAMPLE_DOC_STRING = """
47
56
  >>> from diffusers import StableDiffusionSAGPipeline
48
57
 
49
58
  >>> pipe = StableDiffusionSAGPipeline.from_pretrained(
50
- ... "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
59
+ ... "stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16
51
60
  ... )
52
61
  >>> pipe = pipe.to("cuda")
53
62
 
@@ -123,8 +132,8 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
123
132
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
124
133
  safety_checker ([`StableDiffusionSafetyChecker`]):
125
134
  Classification module that estimates whether generated images could be considered offensive or harmful.
126
- Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
127
- about a model's potential harms.
135
+ Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
136
+ more details about a model's potential harms.
128
137
  feature_extractor ([`~transformers.CLIPImageProcessor`]):
129
138
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
130
139
  """
@@ -157,7 +166,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
157
166
  feature_extractor=feature_extractor,
158
167
  image_encoder=image_encoder,
159
168
  )
160
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
169
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
161
170
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
162
171
  self.register_to_config(requires_safety_checker=requires_safety_checker)
163
172
 
@@ -840,6 +849,9 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
840
849
  step_idx = i // getattr(self.scheduler, "order", 1)
841
850
  callback(step_idx, t, latents)
842
851
 
852
+ if XLA_AVAILABLE:
853
+ xm.mark_step()
854
+
843
855
  if not output_type == "latent":
844
856
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
845
857
  image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
@@ -65,7 +65,7 @@ class FlaxStableDiffusionXLPipeline(FlaxDiffusionPipeline):
65
65
  unet=unet,
66
66
  scheduler=scheduler,
67
67
  )
68
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
68
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
69
69
 
70
70
  def prepare_inputs(self, prompt: Union[str, List[str]]):
71
71
  if not isinstance(prompt, (str, list)):
@@ -269,10 +269,14 @@ class StableDiffusionXLPipeline(
269
269
  feature_extractor=feature_extractor,
270
270
  )
271
271
  self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
272
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
272
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
273
273
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
274
274
 
275
- self.default_sample_size = self.unet.config.sample_size
275
+ self.default_sample_size = (
276
+ self.unet.config.sample_size
277
+ if hasattr(self, "unet") and self.unet is not None and hasattr(self.unet.config, "sample_size")
278
+ else 128
279
+ )
276
280
 
277
281
  add_watermarker = add_watermarker if add_watermarker is not None else is_invisible_watermark_available()
278
282
 
@@ -406,7 +410,9 @@ class StableDiffusionXLPipeline(
406
410
  prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
407
411
 
408
412
  # We are only ALWAYS interested in the pooled output of the final text encoder
409
- pooled_prompt_embeds = prompt_embeds[0]
413
+ if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
414
+ pooled_prompt_embeds = prompt_embeds[0]
415
+
410
416
  if clip_skip is None:
411
417
  prompt_embeds = prompt_embeds.hidden_states[-2]
412
418
  else:
@@ -465,8 +471,10 @@ class StableDiffusionXLPipeline(
465
471
  uncond_input.input_ids.to(device),
466
472
  output_hidden_states=True,
467
473
  )
474
+
468
475
  # We are only ALWAYS interested in the pooled output of the final text encoder
469
- negative_pooled_prompt_embeds = negative_prompt_embeds[0]
476
+ if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
477
+ negative_pooled_prompt_embeds = negative_prompt_embeds[0]
470
478
  negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
471
479
 
472
480
  negative_prompt_embeds_list.append(negative_prompt_embeds)
@@ -291,7 +291,7 @@ class StableDiffusionXLImg2ImgPipeline(
291
291
  )
292
292
  self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
293
293
  self.register_to_config(requires_aesthetics_score=requires_aesthetics_score)
294
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
294
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
295
295
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
296
296
 
297
297
  add_watermarker = add_watermarker if add_watermarker is not None else is_invisible_watermark_available()
@@ -427,7 +427,9 @@ class StableDiffusionXLImg2ImgPipeline(
427
427
  prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
428
428
 
429
429
  # We are only ALWAYS interested in the pooled output of the final text encoder
430
- pooled_prompt_embeds = prompt_embeds[0]
430
+ if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
431
+ pooled_prompt_embeds = prompt_embeds[0]
432
+
431
433
  if clip_skip is None:
432
434
  prompt_embeds = prompt_embeds.hidden_states[-2]
433
435
  else:
@@ -486,8 +488,10 @@ class StableDiffusionXLImg2ImgPipeline(
486
488
  uncond_input.input_ids.to(device),
487
489
  output_hidden_states=True,
488
490
  )
491
+
489
492
  # We are only ALWAYS interested in the pooled output of the final text encoder
490
- negative_pooled_prompt_embeds = negative_prompt_embeds[0]
493
+ if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
494
+ negative_pooled_prompt_embeds = negative_prompt_embeds[0]
491
495
  negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
492
496
 
493
497
  negative_prompt_embeds_list.append(negative_prompt_embeds)
@@ -321,7 +321,7 @@ class StableDiffusionXLInpaintPipeline(
321
321
  )
322
322
  self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
323
323
  self.register_to_config(requires_aesthetics_score=requires_aesthetics_score)
324
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
324
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
325
325
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
326
326
  self.mask_processor = VaeImageProcessor(
327
327
  vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True
@@ -531,7 +531,9 @@ class StableDiffusionXLInpaintPipeline(
531
531
  prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
532
532
 
533
533
  # We are only ALWAYS interested in the pooled output of the final text encoder
534
- pooled_prompt_embeds = prompt_embeds[0]
534
+ if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
535
+ pooled_prompt_embeds = prompt_embeds[0]
536
+
535
537
  if clip_skip is None:
536
538
  prompt_embeds = prompt_embeds.hidden_states[-2]
537
539
  else:
@@ -590,8 +592,10 @@ class StableDiffusionXLInpaintPipeline(
590
592
  uncond_input.input_ids.to(device),
591
593
  output_hidden_states=True,
592
594
  )
595
+
593
596
  # We are only ALWAYS interested in the pooled output of the final text encoder
594
- negative_pooled_prompt_embeds = negative_prompt_embeds[0]
597
+ if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
598
+ negative_pooled_prompt_embeds = negative_prompt_embeds[0]
595
599
  negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
596
600
 
597
601
  negative_prompt_embeds_list.append(negative_prompt_embeds)
@@ -737,7 +741,7 @@ class StableDiffusionXLInpaintPipeline(
737
741
  if padding_mask_crop is not None:
738
742
  if not isinstance(image, PIL.Image.Image):
739
743
  raise ValueError(
740
- f"The image should be a PIL image when inpainting mask crop, but is of type" f" {type(image)}."
744
+ f"The image should be a PIL image when inpainting mask crop, but is of type {type(image)}."
741
745
  )
742
746
  if not isinstance(mask_image, PIL.Image.Image):
743
747
  raise ValueError(
@@ -745,7 +749,7 @@ class StableDiffusionXLInpaintPipeline(
745
749
  f" {type(mask_image)}."
746
750
  )
747
751
  if output_type != "pil":
748
- raise ValueError(f"The output type should be PIL when inpainting mask crop, but is" f" {output_type}.")
752
+ raise ValueError(f"The output type should be PIL when inpainting mask crop, but is {output_type}.")
749
753
 
750
754
  if ip_adapter_image is not None and ip_adapter_image_embeds is not None:
751
755
  raise ValueError(
@@ -1505,7 +1509,7 @@ class StableDiffusionXLInpaintPipeline(
1505
1509
  f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects"
1506
1510
  f" {self.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +"
1507
1511
  f" `num_channels_mask`: {num_channels_mask} + `num_channels_masked_image`: {num_channels_masked_image}"
1508
- f" = {num_channels_latents+num_channels_masked_image+num_channels_mask}. Please verify the config of"
1512
+ f" = {num_channels_latents + num_channels_masked_image + num_channels_mask}. Please verify the config of"
1509
1513
  " `pipeline.unet` or your `mask_image` or `image` input."
1510
1514
  )
1511
1515
  elif num_channels_unet != 4:
@@ -199,9 +199,13 @@ class StableDiffusionXLInstructPix2PixPipeline(
199
199
  scheduler=scheduler,
200
200
  )
201
201
  self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
202
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
202
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
203
203
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
204
- self.default_sample_size = self.unet.config.sample_size
204
+ self.default_sample_size = (
205
+ self.unet.config.sample_size
206
+ if hasattr(self, "unet") and self.unet is not None and hasattr(self.unet.config, "sample_size")
207
+ else 128
208
+ )
205
209
  self.is_cosxl_edit = is_cosxl_edit
206
210
 
207
211
  add_watermarker = add_watermarker if add_watermarker is not None else is_invisible_watermark_available()
@@ -333,7 +337,9 @@ class StableDiffusionXLInstructPix2PixPipeline(
333
337
  )
334
338
 
335
339
  # We are only ALWAYS interested in the pooled output of the final text encoder
336
- pooled_prompt_embeds = prompt_embeds[0]
340
+ if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
341
+ pooled_prompt_embeds = prompt_embeds[0]
342
+
337
343
  prompt_embeds = prompt_embeds.hidden_states[-2]
338
344
 
339
345
  prompt_embeds_list.append(prompt_embeds)
@@ -385,7 +391,8 @@ class StableDiffusionXLInstructPix2PixPipeline(
385
391
  output_hidden_states=True,
386
392
  )
387
393
  # We are only ALWAYS interested in the pooled output of the final text encoder
388
- negative_pooled_prompt_embeds = negative_prompt_embeds[0]
394
+ if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
395
+ negative_pooled_prompt_embeds = negative_prompt_embeds[0]
389
396
  negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
390
397
 
391
398
  negative_prompt_embeds_list.append(negative_prompt_embeds)
@@ -24,14 +24,22 @@ from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
24
24
  from ...image_processor import PipelineImageInput
25
25
  from ...models import AutoencoderKLTemporalDecoder, UNetSpatioTemporalConditionModel
26
26
  from ...schedulers import EulerDiscreteScheduler
27
- from ...utils import BaseOutput, logging, replace_example_docstring
27
+ from ...utils import BaseOutput, is_torch_xla_available, logging, replace_example_docstring
28
28
  from ...utils.torch_utils import is_compiled_module, randn_tensor
29
29
  from ...video_processor import VideoProcessor
30
30
  from ..pipeline_utils import DiffusionPipeline
31
31
 
32
32
 
33
+ if is_torch_xla_available():
34
+ import torch_xla.core.xla_model as xm
35
+
36
+ XLA_AVAILABLE = True
37
+ else:
38
+ XLA_AVAILABLE = False
39
+
33
40
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
34
41
 
42
+
35
43
  EXAMPLE_DOC_STRING = """
36
44
  Examples:
37
45
  ```py
@@ -177,7 +185,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
177
185
  scheduler=scheduler,
178
186
  feature_extractor=feature_extractor,
179
187
  )
180
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
188
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
181
189
  self.video_processor = VideoProcessor(do_resize=True, vae_scale_factor=self.vae_scale_factor)
182
190
 
183
191
  def _encode_image(
@@ -600,6 +608,9 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
600
608
  if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
601
609
  progress_bar.update()
602
610
 
611
+ if XLA_AVAILABLE:
612
+ xm.mark_step()
613
+
603
614
  if not output_type == "latent":
604
615
  # cast back to fp16 if needed
605
616
  if needs_upcasting:
@@ -22,7 +22,7 @@ import torch
22
22
  from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
23
23
 
24
24
  from ...image_processor import VaeImageProcessor
25
- from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
25
+ from ...loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
26
26
  from ...models import AutoencoderKL, MultiAdapter, T2IAdapter, UNet2DConditionModel
27
27
  from ...models.lora import adjust_lora_scale_text_encoder
28
28
  from ...schedulers import KarrasDiffusionSchedulers
@@ -31,6 +31,7 @@ from ...utils import (
31
31
  USE_PEFT_BACKEND,
32
32
  BaseOutput,
33
33
  deprecate,
34
+ is_torch_xla_available,
34
35
  logging,
35
36
  replace_example_docstring,
36
37
  scale_lora_layers,
@@ -41,6 +42,14 @@ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
41
42
  from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
42
43
 
43
44
 
45
+ if is_torch_xla_available():
46
+ import torch_xla.core.xla_model as xm
47
+
48
+ XLA_AVAILABLE = True
49
+ else:
50
+ XLA_AVAILABLE = False
51
+
52
+
44
53
  @dataclass
45
54
  class StableDiffusionAdapterPipelineOutput(BaseOutput):
46
55
  """
@@ -59,6 +68,7 @@ class StableDiffusionAdapterPipelineOutput(BaseOutput):
59
68
 
60
69
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
61
70
 
71
+
62
72
  EXAMPLE_DOC_STRING = """
63
73
  Examples:
64
74
  ```py
@@ -178,7 +188,7 @@ def retrieve_timesteps(
178
188
  return timesteps, num_inference_steps
179
189
 
180
190
 
181
- class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin):
191
+ class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin, FromSingleFileMixin):
182
192
  r"""
183
193
  Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter
184
194
  https://arxiv.org/abs/2302.08453
@@ -208,7 +218,8 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin):
208
218
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
209
219
  safety_checker ([`StableDiffusionSafetyChecker`]):
210
220
  Classification module that estimates whether generated images could be considered offensive or harmful.
211
- Please, refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for details.
221
+ Please, refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
222
+ details.
212
223
  feature_extractor ([`CLIPImageProcessor`]):
213
224
  Model that extracts features from generated images to be used as inputs for the `safety_checker`.
214
225
  """
@@ -259,7 +270,7 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin):
259
270
  safety_checker=safety_checker,
260
271
  feature_extractor=feature_extractor,
261
272
  )
262
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
273
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
263
274
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
264
275
  self.register_to_config(requires_safety_checker=requires_safety_checker)
265
276
 
@@ -914,6 +925,9 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin):
914
925
  step_idx = i // getattr(self.scheduler, "order", 1)
915
926
  callback(step_idx, t, latents)
916
927
 
928
+ if XLA_AVAILABLE:
929
+ xm.mark_step()
930
+
917
931
  if output_type == "latent":
918
932
  image = latents
919
933
  has_nsfw_concept = None