diffusers 0.27.1__py3-none-any.whl → 0.32.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (445) hide show
  1. diffusers/__init__.py +233 -6
  2. diffusers/callbacks.py +209 -0
  3. diffusers/commands/env.py +102 -6
  4. diffusers/configuration_utils.py +45 -16
  5. diffusers/dependency_versions_table.py +4 -3
  6. diffusers/image_processor.py +434 -110
  7. diffusers/loaders/__init__.py +42 -9
  8. diffusers/loaders/ip_adapter.py +626 -36
  9. diffusers/loaders/lora_base.py +900 -0
  10. diffusers/loaders/lora_conversion_utils.py +991 -125
  11. diffusers/loaders/lora_pipeline.py +3812 -0
  12. diffusers/loaders/peft.py +571 -7
  13. diffusers/loaders/single_file.py +405 -173
  14. diffusers/loaders/single_file_model.py +385 -0
  15. diffusers/loaders/single_file_utils.py +1783 -713
  16. diffusers/loaders/textual_inversion.py +41 -23
  17. diffusers/loaders/transformer_flux.py +181 -0
  18. diffusers/loaders/transformer_sd3.py +89 -0
  19. diffusers/loaders/unet.py +464 -540
  20. diffusers/loaders/unet_loader_utils.py +163 -0
  21. diffusers/models/__init__.py +76 -7
  22. diffusers/models/activations.py +65 -10
  23. diffusers/models/adapter.py +53 -53
  24. diffusers/models/attention.py +605 -18
  25. diffusers/models/attention_flax.py +1 -1
  26. diffusers/models/attention_processor.py +4304 -687
  27. diffusers/models/autoencoders/__init__.py +8 -0
  28. diffusers/models/autoencoders/autoencoder_asym_kl.py +15 -17
  29. diffusers/models/autoencoders/autoencoder_dc.py +620 -0
  30. diffusers/models/autoencoders/autoencoder_kl.py +110 -28
  31. diffusers/models/autoencoders/autoencoder_kl_allegro.py +1149 -0
  32. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +1482 -0
  33. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +1176 -0
  34. diffusers/models/autoencoders/autoencoder_kl_ltx.py +1338 -0
  35. diffusers/models/autoencoders/autoencoder_kl_mochi.py +1166 -0
  36. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +19 -24
  37. diffusers/models/autoencoders/autoencoder_oobleck.py +464 -0
  38. diffusers/models/autoencoders/autoencoder_tiny.py +21 -18
  39. diffusers/models/autoencoders/consistency_decoder_vae.py +45 -20
  40. diffusers/models/autoencoders/vae.py +41 -29
  41. diffusers/models/autoencoders/vq_model.py +182 -0
  42. diffusers/models/controlnet.py +47 -800
  43. diffusers/models/controlnet_flux.py +70 -0
  44. diffusers/models/controlnet_sd3.py +68 -0
  45. diffusers/models/controlnet_sparsectrl.py +116 -0
  46. diffusers/models/controlnets/__init__.py +23 -0
  47. diffusers/models/controlnets/controlnet.py +872 -0
  48. diffusers/models/{controlnet_flax.py → controlnets/controlnet_flax.py} +9 -9
  49. diffusers/models/controlnets/controlnet_flux.py +536 -0
  50. diffusers/models/controlnets/controlnet_hunyuan.py +401 -0
  51. diffusers/models/controlnets/controlnet_sd3.py +489 -0
  52. diffusers/models/controlnets/controlnet_sparsectrl.py +788 -0
  53. diffusers/models/controlnets/controlnet_union.py +832 -0
  54. diffusers/models/controlnets/controlnet_xs.py +1946 -0
  55. diffusers/models/controlnets/multicontrolnet.py +183 -0
  56. diffusers/models/downsampling.py +85 -18
  57. diffusers/models/embeddings.py +1856 -158
  58. diffusers/models/embeddings_flax.py +23 -9
  59. diffusers/models/model_loading_utils.py +480 -0
  60. diffusers/models/modeling_flax_pytorch_utils.py +2 -1
  61. diffusers/models/modeling_flax_utils.py +2 -7
  62. diffusers/models/modeling_outputs.py +14 -0
  63. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  64. diffusers/models/modeling_utils.py +611 -146
  65. diffusers/models/normalization.py +361 -20
  66. diffusers/models/resnet.py +18 -23
  67. diffusers/models/transformers/__init__.py +16 -0
  68. diffusers/models/transformers/auraflow_transformer_2d.py +544 -0
  69. diffusers/models/transformers/cogvideox_transformer_3d.py +542 -0
  70. diffusers/models/transformers/dit_transformer_2d.py +240 -0
  71. diffusers/models/transformers/dual_transformer_2d.py +9 -8
  72. diffusers/models/transformers/hunyuan_transformer_2d.py +578 -0
  73. diffusers/models/transformers/latte_transformer_3d.py +327 -0
  74. diffusers/models/transformers/lumina_nextdit2d.py +340 -0
  75. diffusers/models/transformers/pixart_transformer_2d.py +445 -0
  76. diffusers/models/transformers/prior_transformer.py +13 -13
  77. diffusers/models/transformers/sana_transformer.py +488 -0
  78. diffusers/models/transformers/stable_audio_transformer.py +458 -0
  79. diffusers/models/transformers/t5_film_transformer.py +17 -19
  80. diffusers/models/transformers/transformer_2d.py +297 -187
  81. diffusers/models/transformers/transformer_allegro.py +422 -0
  82. diffusers/models/transformers/transformer_cogview3plus.py +386 -0
  83. diffusers/models/transformers/transformer_flux.py +593 -0
  84. diffusers/models/transformers/transformer_hunyuan_video.py +791 -0
  85. diffusers/models/transformers/transformer_ltx.py +469 -0
  86. diffusers/models/transformers/transformer_mochi.py +499 -0
  87. diffusers/models/transformers/transformer_sd3.py +461 -0
  88. diffusers/models/transformers/transformer_temporal.py +21 -19
  89. diffusers/models/unets/unet_1d.py +8 -8
  90. diffusers/models/unets/unet_1d_blocks.py +31 -31
  91. diffusers/models/unets/unet_2d.py +17 -10
  92. diffusers/models/unets/unet_2d_blocks.py +225 -149
  93. diffusers/models/unets/unet_2d_condition.py +41 -40
  94. diffusers/models/unets/unet_2d_condition_flax.py +6 -5
  95. diffusers/models/unets/unet_3d_blocks.py +192 -1057
  96. diffusers/models/unets/unet_3d_condition.py +22 -27
  97. diffusers/models/unets/unet_i2vgen_xl.py +22 -18
  98. diffusers/models/unets/unet_kandinsky3.py +2 -2
  99. diffusers/models/unets/unet_motion_model.py +1413 -89
  100. diffusers/models/unets/unet_spatio_temporal_condition.py +40 -16
  101. diffusers/models/unets/unet_stable_cascade.py +19 -18
  102. diffusers/models/unets/uvit_2d.py +2 -2
  103. diffusers/models/upsampling.py +95 -26
  104. diffusers/models/vq_model.py +12 -164
  105. diffusers/optimization.py +1 -1
  106. diffusers/pipelines/__init__.py +202 -3
  107. diffusers/pipelines/allegro/__init__.py +48 -0
  108. diffusers/pipelines/allegro/pipeline_allegro.py +938 -0
  109. diffusers/pipelines/allegro/pipeline_output.py +23 -0
  110. diffusers/pipelines/amused/pipeline_amused.py +12 -12
  111. diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
  112. diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
  113. diffusers/pipelines/animatediff/__init__.py +8 -0
  114. diffusers/pipelines/animatediff/pipeline_animatediff.py +122 -109
  115. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +1106 -0
  116. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1288 -0
  117. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +1010 -0
  118. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +236 -180
  119. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
  120. diffusers/pipelines/animatediff/pipeline_output.py +3 -2
  121. diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
  122. diffusers/pipelines/audioldm2/modeling_audioldm2.py +58 -39
  123. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +121 -36
  124. diffusers/pipelines/aura_flow/__init__.py +48 -0
  125. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +584 -0
  126. diffusers/pipelines/auto_pipeline.py +196 -28
  127. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  128. diffusers/pipelines/blip_diffusion/modeling_blip2.py +6 -6
  129. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
  130. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
  131. diffusers/pipelines/cogvideo/__init__.py +54 -0
  132. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +772 -0
  133. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +825 -0
  134. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +885 -0
  135. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +851 -0
  136. diffusers/pipelines/cogvideo/pipeline_output.py +20 -0
  137. diffusers/pipelines/cogview3/__init__.py +47 -0
  138. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
  139. diffusers/pipelines/cogview3/pipeline_output.py +21 -0
  140. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +6 -6
  141. diffusers/pipelines/controlnet/__init__.py +86 -80
  142. diffusers/pipelines/controlnet/multicontrolnet.py +7 -182
  143. diffusers/pipelines/controlnet/pipeline_controlnet.py +134 -87
  144. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
  145. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +93 -77
  146. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +88 -197
  147. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +136 -90
  148. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +176 -80
  149. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +125 -89
  150. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +1790 -0
  151. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +1501 -0
  152. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +1627 -0
  153. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -2
  154. diffusers/pipelines/controlnet_hunyuandit/__init__.py +48 -0
  155. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +1060 -0
  156. diffusers/pipelines/controlnet_sd3/__init__.py +57 -0
  157. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +1133 -0
  158. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
  159. diffusers/pipelines/controlnet_xs/__init__.py +68 -0
  160. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +916 -0
  161. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1111 -0
  162. diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
  163. diffusers/pipelines/deepfloyd_if/pipeline_if.py +16 -30
  164. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +20 -35
  165. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +23 -41
  166. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +22 -38
  167. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +25 -41
  168. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +19 -34
  169. diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
  170. diffusers/pipelines/deepfloyd_if/watermark.py +1 -1
  171. diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
  172. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +70 -30
  173. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +48 -25
  174. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
  175. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
  176. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +21 -20
  177. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +27 -29
  178. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +33 -27
  179. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +33 -23
  180. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +36 -30
  181. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +102 -69
  182. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
  183. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
  184. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
  185. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
  186. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
  187. diffusers/pipelines/dit/pipeline_dit.py +7 -4
  188. diffusers/pipelines/flux/__init__.py +69 -0
  189. diffusers/pipelines/flux/modeling_flux.py +47 -0
  190. diffusers/pipelines/flux/pipeline_flux.py +957 -0
  191. diffusers/pipelines/flux/pipeline_flux_control.py +889 -0
  192. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +945 -0
  193. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1141 -0
  194. diffusers/pipelines/flux/pipeline_flux_controlnet.py +1006 -0
  195. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +998 -0
  196. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1204 -0
  197. diffusers/pipelines/flux/pipeline_flux_fill.py +969 -0
  198. diffusers/pipelines/flux/pipeline_flux_img2img.py +856 -0
  199. diffusers/pipelines/flux/pipeline_flux_inpaint.py +1022 -0
  200. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +492 -0
  201. diffusers/pipelines/flux/pipeline_output.py +37 -0
  202. diffusers/pipelines/free_init_utils.py +41 -38
  203. diffusers/pipelines/free_noise_utils.py +596 -0
  204. diffusers/pipelines/hunyuan_video/__init__.py +48 -0
  205. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +687 -0
  206. diffusers/pipelines/hunyuan_video/pipeline_output.py +20 -0
  207. diffusers/pipelines/hunyuandit/__init__.py +48 -0
  208. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +916 -0
  209. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
  210. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
  211. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +32 -29
  212. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
  213. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
  214. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
  215. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  216. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +34 -31
  217. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
  218. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
  219. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
  220. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
  221. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
  222. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
  223. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
  224. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +22 -35
  225. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +26 -37
  226. diffusers/pipelines/kolors/__init__.py +54 -0
  227. diffusers/pipelines/kolors/pipeline_kolors.py +1070 -0
  228. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +1250 -0
  229. diffusers/pipelines/kolors/pipeline_output.py +21 -0
  230. diffusers/pipelines/kolors/text_encoder.py +889 -0
  231. diffusers/pipelines/kolors/tokenizer.py +338 -0
  232. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +82 -62
  233. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +77 -60
  234. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +12 -12
  235. diffusers/pipelines/latte/__init__.py +48 -0
  236. diffusers/pipelines/latte/pipeline_latte.py +881 -0
  237. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +80 -74
  238. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +85 -76
  239. diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
  240. diffusers/pipelines/ltx/__init__.py +50 -0
  241. diffusers/pipelines/ltx/pipeline_ltx.py +789 -0
  242. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +885 -0
  243. diffusers/pipelines/ltx/pipeline_output.py +20 -0
  244. diffusers/pipelines/lumina/__init__.py +48 -0
  245. diffusers/pipelines/lumina/pipeline_lumina.py +890 -0
  246. diffusers/pipelines/marigold/__init__.py +50 -0
  247. diffusers/pipelines/marigold/marigold_image_processing.py +576 -0
  248. diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
  249. diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
  250. diffusers/pipelines/mochi/__init__.py +48 -0
  251. diffusers/pipelines/mochi/pipeline_mochi.py +748 -0
  252. diffusers/pipelines/mochi/pipeline_output.py +20 -0
  253. diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
  254. diffusers/pipelines/pag/__init__.py +80 -0
  255. diffusers/pipelines/pag/pag_utils.py +243 -0
  256. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1328 -0
  257. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1543 -0
  258. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +1610 -0
  259. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1683 -0
  260. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +969 -0
  261. diffusers/pipelines/pag/pipeline_pag_kolors.py +1136 -0
  262. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +865 -0
  263. diffusers/pipelines/pag/pipeline_pag_sana.py +886 -0
  264. diffusers/pipelines/pag/pipeline_pag_sd.py +1062 -0
  265. diffusers/pipelines/pag/pipeline_pag_sd_3.py +994 -0
  266. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +1058 -0
  267. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +866 -0
  268. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1094 -0
  269. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +1356 -0
  270. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +1345 -0
  271. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +1544 -0
  272. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +1776 -0
  273. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
  274. diffusers/pipelines/pia/pipeline_pia.py +74 -164
  275. diffusers/pipelines/pipeline_flax_utils.py +5 -10
  276. diffusers/pipelines/pipeline_loading_utils.py +515 -53
  277. diffusers/pipelines/pipeline_utils.py +411 -222
  278. diffusers/pipelines/pixart_alpha/__init__.py +8 -1
  279. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +76 -93
  280. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +873 -0
  281. diffusers/pipelines/sana/__init__.py +47 -0
  282. diffusers/pipelines/sana/pipeline_output.py +21 -0
  283. diffusers/pipelines/sana/pipeline_sana.py +884 -0
  284. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +27 -23
  285. diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
  286. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
  287. diffusers/pipelines/shap_e/renderer.py +1 -1
  288. diffusers/pipelines/stable_audio/__init__.py +50 -0
  289. diffusers/pipelines/stable_audio/modeling_stable_audio.py +158 -0
  290. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +756 -0
  291. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +71 -25
  292. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
  293. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +35 -34
  294. diffusers/pipelines/stable_diffusion/__init__.py +0 -1
  295. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +20 -11
  296. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  297. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  298. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
  299. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +145 -79
  300. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +43 -28
  301. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
  302. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +100 -68
  303. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +109 -201
  304. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +131 -32
  305. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +247 -87
  306. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +30 -29
  307. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +35 -27
  308. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +49 -42
  309. diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
  310. diffusers/pipelines/stable_diffusion_3/__init__.py +54 -0
  311. diffusers/pipelines/stable_diffusion_3/pipeline_output.py +21 -0
  312. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +1140 -0
  313. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +1036 -0
  314. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +1250 -0
  315. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +29 -20
  316. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +59 -58
  317. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +31 -25
  318. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +38 -22
  319. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -24
  320. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -23
  321. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +107 -67
  322. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +316 -69
  323. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
  324. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  325. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +98 -30
  326. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +121 -83
  327. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +161 -105
  328. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +142 -218
  329. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -29
  330. diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
  331. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
  332. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +69 -39
  333. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +105 -74
  334. diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
  335. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +29 -49
  336. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +32 -93
  337. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +37 -25
  338. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +54 -40
  339. diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
  340. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
  341. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
  342. diffusers/pipelines/unidiffuser/modeling_uvit.py +12 -12
  343. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +29 -28
  344. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
  345. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
  346. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +6 -8
  347. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
  348. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
  349. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +15 -14
  350. diffusers/{models/dual_transformer_2d.py → quantizers/__init__.py} +2 -6
  351. diffusers/quantizers/auto.py +139 -0
  352. diffusers/quantizers/base.py +233 -0
  353. diffusers/quantizers/bitsandbytes/__init__.py +2 -0
  354. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +561 -0
  355. diffusers/quantizers/bitsandbytes/utils.py +306 -0
  356. diffusers/quantizers/gguf/__init__.py +1 -0
  357. diffusers/quantizers/gguf/gguf_quantizer.py +159 -0
  358. diffusers/quantizers/gguf/utils.py +456 -0
  359. diffusers/quantizers/quantization_config.py +669 -0
  360. diffusers/quantizers/torchao/__init__.py +15 -0
  361. diffusers/quantizers/torchao/torchao_quantizer.py +292 -0
  362. diffusers/schedulers/__init__.py +12 -2
  363. diffusers/schedulers/deprecated/__init__.py +1 -1
  364. diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
  365. diffusers/schedulers/scheduling_amused.py +5 -5
  366. diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
  367. diffusers/schedulers/scheduling_consistency_models.py +23 -25
  368. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +572 -0
  369. diffusers/schedulers/scheduling_ddim.py +27 -26
  370. diffusers/schedulers/scheduling_ddim_cogvideox.py +452 -0
  371. diffusers/schedulers/scheduling_ddim_flax.py +2 -1
  372. diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
  373. diffusers/schedulers/scheduling_ddim_parallel.py +32 -31
  374. diffusers/schedulers/scheduling_ddpm.py +27 -30
  375. diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
  376. diffusers/schedulers/scheduling_ddpm_parallel.py +33 -36
  377. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
  378. diffusers/schedulers/scheduling_deis_multistep.py +150 -50
  379. diffusers/schedulers/scheduling_dpm_cogvideox.py +489 -0
  380. diffusers/schedulers/scheduling_dpmsolver_multistep.py +221 -84
  381. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
  382. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +158 -52
  383. diffusers/schedulers/scheduling_dpmsolver_sde.py +153 -34
  384. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +275 -86
  385. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +81 -57
  386. diffusers/schedulers/scheduling_edm_euler.py +62 -39
  387. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +30 -29
  388. diffusers/schedulers/scheduling_euler_discrete.py +255 -74
  389. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +458 -0
  390. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +320 -0
  391. diffusers/schedulers/scheduling_heun_discrete.py +174 -46
  392. diffusers/schedulers/scheduling_ipndm.py +9 -9
  393. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +138 -29
  394. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +132 -26
  395. diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
  396. diffusers/schedulers/scheduling_lcm.py +23 -29
  397. diffusers/schedulers/scheduling_lms_discrete.py +105 -28
  398. diffusers/schedulers/scheduling_pndm.py +20 -20
  399. diffusers/schedulers/scheduling_repaint.py +21 -21
  400. diffusers/schedulers/scheduling_sasolver.py +157 -60
  401. diffusers/schedulers/scheduling_sde_ve.py +19 -19
  402. diffusers/schedulers/scheduling_tcd.py +41 -36
  403. diffusers/schedulers/scheduling_unclip.py +19 -16
  404. diffusers/schedulers/scheduling_unipc_multistep.py +243 -47
  405. diffusers/schedulers/scheduling_utils.py +12 -5
  406. diffusers/schedulers/scheduling_utils_flax.py +1 -3
  407. diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
  408. diffusers/training_utils.py +214 -30
  409. diffusers/utils/__init__.py +17 -1
  410. diffusers/utils/constants.py +3 -0
  411. diffusers/utils/doc_utils.py +1 -0
  412. diffusers/utils/dummy_pt_objects.py +592 -7
  413. diffusers/utils/dummy_torch_and_torchsde_objects.py +15 -0
  414. diffusers/utils/dummy_torch_and_transformers_and_sentencepiece_objects.py +47 -0
  415. diffusers/utils/dummy_torch_and_transformers_objects.py +1001 -71
  416. diffusers/utils/dynamic_modules_utils.py +34 -29
  417. diffusers/utils/export_utils.py +50 -6
  418. diffusers/utils/hub_utils.py +131 -17
  419. diffusers/utils/import_utils.py +210 -8
  420. diffusers/utils/loading_utils.py +118 -5
  421. diffusers/utils/logging.py +4 -2
  422. diffusers/utils/peft_utils.py +37 -7
  423. diffusers/utils/state_dict_utils.py +13 -2
  424. diffusers/utils/testing_utils.py +193 -11
  425. diffusers/utils/torch_utils.py +4 -0
  426. diffusers/video_processor.py +113 -0
  427. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/METADATA +82 -91
  428. diffusers-0.32.2.dist-info/RECORD +550 -0
  429. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/WHEEL +1 -1
  430. diffusers/loaders/autoencoder.py +0 -146
  431. diffusers/loaders/controlnet.py +0 -136
  432. diffusers/loaders/lora.py +0 -1349
  433. diffusers/models/prior_transformer.py +0 -12
  434. diffusers/models/t5_film_transformer.py +0 -70
  435. diffusers/models/transformer_2d.py +0 -25
  436. diffusers/models/transformer_temporal.py +0 -34
  437. diffusers/models/unet_1d.py +0 -26
  438. diffusers/models/unet_1d_blocks.py +0 -203
  439. diffusers/models/unet_2d.py +0 -27
  440. diffusers/models/unet_2d_blocks.py +0 -375
  441. diffusers/models/unet_2d_condition.py +0 -25
  442. diffusers-0.27.1.dist-info/RECORD +0 -399
  443. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/LICENSE +0 -0
  444. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/entry_points.txt +0 -0
  445. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/top_level.txt +0 -0
@@ -20,7 +20,7 @@ from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokeniz
20
20
  from transformers.models.clip.modeling_clip import CLIPTextModelOutput
21
21
 
22
22
  from ...image_processor import VaeImageProcessor
23
- from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
23
+ from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
24
24
  from ...models import AutoencoderKL, PriorTransformer, UNet2DConditionModel
25
25
  from ...models.embeddings import get_timestep_embedding
26
26
  from ...models.lora import adjust_lora_scale_text_encoder
@@ -58,7 +58,9 @@ EXAMPLE_DOC_STRING = """
58
58
  """
59
59
 
60
60
 
61
- class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
61
+ class StableUnCLIPPipeline(
62
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin
63
+ ):
62
64
  """
63
65
  Pipeline for text-to-image generation using stable unCLIP.
64
66
 
@@ -67,8 +69,8 @@ class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInver
67
69
 
68
70
  The pipeline also inherits the following loading methods:
69
71
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
70
- - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
71
- - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
72
+ - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
73
+ - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
72
74
 
73
75
  Args:
74
76
  prior_tokenizer ([`CLIPTokenizer`]):
@@ -76,7 +78,7 @@ class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInver
76
78
  prior_text_encoder ([`CLIPTextModelWithProjection`]):
77
79
  Frozen [`CLIPTextModelWithProjection`] text-encoder.
78
80
  prior ([`PriorTransformer`]):
79
- The canonincal unCLIP prior to approximate the image embedding from the text embedding.
81
+ The canonical unCLIP prior to approximate the image embedding from the text embedding.
80
82
  prior_scheduler ([`KarrasDiffusionSchedulers`]):
81
83
  Scheduler used in the prior denoising process.
82
84
  image_normalizer ([`StableUnCLIPImageNormalizer`]):
@@ -257,8 +259,8 @@ class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInver
257
259
  num_images_per_prompt,
258
260
  do_classifier_free_guidance,
259
261
  negative_prompt=None,
260
- prompt_embeds: Optional[torch.FloatTensor] = None,
261
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
262
+ prompt_embeds: Optional[torch.Tensor] = None,
263
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
262
264
  lora_scale: Optional[float] = None,
263
265
  **kwargs,
264
266
  ):
@@ -290,8 +292,8 @@ class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInver
290
292
  num_images_per_prompt,
291
293
  do_classifier_free_guidance,
292
294
  negative_prompt=None,
293
- prompt_embeds: Optional[torch.FloatTensor] = None,
294
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
295
+ prompt_embeds: Optional[torch.Tensor] = None,
296
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
295
297
  lora_scale: Optional[float] = None,
296
298
  clip_skip: Optional[int] = None,
297
299
  ):
@@ -311,10 +313,10 @@ class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInver
311
313
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
312
314
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
313
315
  less than `1`).
314
- prompt_embeds (`torch.FloatTensor`, *optional*):
316
+ prompt_embeds (`torch.Tensor`, *optional*):
315
317
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
316
318
  provided, text embeddings will be generated from `prompt` input argument.
317
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
319
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
318
320
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
319
321
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
320
322
  argument.
@@ -326,7 +328,7 @@ class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInver
326
328
  """
327
329
  # set lora scale so that monkey patched LoRA
328
330
  # function of text encoder can correctly access it
329
- if lora_scale is not None and isinstance(self, LoraLoaderMixin):
331
+ if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
330
332
  self._lora_scale = lora_scale
331
333
 
332
334
  # dynamically adjust the LoRA scale
@@ -458,9 +460,10 @@ class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInver
458
460
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
459
461
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
460
462
 
461
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
462
- # Retrieve the original scale by scaling back the LoRA layers
463
- unscale_lora_layers(self.text_encoder, lora_scale)
463
+ if self.text_encoder is not None:
464
+ if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
465
+ # Retrieve the original scale by scaling back the LoRA layers
466
+ unscale_lora_layers(self.text_encoder, lora_scale)
464
467
 
465
468
  return prompt_embeds, negative_prompt_embeds
466
469
 
@@ -588,7 +591,7 @@ class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInver
588
591
  self,
589
592
  image_embeds: torch.Tensor,
590
593
  noise_level: int,
591
- noise: Optional[torch.FloatTensor] = None,
594
+ noise: Optional[torch.Tensor] = None,
592
595
  generator: Optional[torch.Generator] = None,
593
596
  ):
594
597
  """
@@ -644,19 +647,19 @@ class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInver
644
647
  num_images_per_prompt: Optional[int] = 1,
645
648
  eta: float = 0.0,
646
649
  generator: Optional[torch.Generator] = None,
647
- latents: Optional[torch.FloatTensor] = None,
648
- prompt_embeds: Optional[torch.FloatTensor] = None,
649
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
650
+ latents: Optional[torch.Tensor] = None,
651
+ prompt_embeds: Optional[torch.Tensor] = None,
652
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
650
653
  output_type: Optional[str] = "pil",
651
654
  return_dict: bool = True,
652
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
655
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
653
656
  callback_steps: int = 1,
654
657
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
655
658
  noise_level: int = 0,
656
659
  # prior args
657
660
  prior_num_inference_steps: int = 25,
658
661
  prior_guidance_scale: float = 4.0,
659
- prior_latents: Optional[torch.FloatTensor] = None,
662
+ prior_latents: Optional[torch.Tensor] = None,
660
663
  clip_skip: Optional[int] = None,
661
664
  ):
662
665
  """
@@ -686,14 +689,14 @@ class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInver
686
689
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
687
690
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
688
691
  generation deterministic.
689
- latents (`torch.FloatTensor`, *optional*):
692
+ latents (`torch.Tensor`, *optional*):
690
693
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
691
694
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
692
695
  tensor is generated by sampling using the supplied random `generator`.
693
- prompt_embeds (`torch.FloatTensor`, *optional*):
696
+ prompt_embeds (`torch.Tensor`, *optional*):
694
697
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
695
698
  provided, text embeddings are generated from the `prompt` input argument.
696
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
699
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
697
700
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
698
701
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
699
702
  output_type (`str`, *optional*, defaults to `"pil"`):
@@ -702,7 +705,7 @@ class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInver
702
705
  Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
703
706
  callback (`Callable`, *optional*):
704
707
  A function that calls every `callback_steps` steps during inference. The function is called with the
705
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
708
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
706
709
  callback_steps (`int`, *optional*, defaults to 1):
707
710
  The frequency at which the `callback` function is called. If not specified, the callback is called at
708
711
  every step.
@@ -718,7 +721,7 @@ class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInver
718
721
  prior_guidance_scale (`float`, *optional*, defaults to 4.0):
719
722
  A higher guidance scale value encourages the model to generate images closely linked to the text
720
723
  `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
721
- prior_latents (`torch.FloatTensor`, *optional*):
724
+ prior_latents (`torch.Tensor`, *optional*):
722
725
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
723
726
  embedding generation in the prior denoising process. Can be used to tweak the same generation with
724
727
  different prompts. If not provided, a latents tensor is generated by sampling using the supplied random
@@ -876,7 +879,12 @@ class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInver
876
879
 
877
880
  # 11. Prepare latent variables
878
881
  num_channels_latents = self.unet.config.in_channels
879
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
882
+ shape = (
883
+ batch_size,
884
+ num_channels_latents,
885
+ int(height) // self.vae_scale_factor,
886
+ int(width) // self.vae_scale_factor,
887
+ )
880
888
  latents = self.prepare_latents(
881
889
  shape=shape,
882
890
  dtype=prompt_embeds.dtype,
@@ -20,7 +20,7 @@ import torch
20
20
  from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
21
21
 
22
22
  from ...image_processor import VaeImageProcessor
23
- from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
23
+ from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
24
24
  from ...models import AutoencoderKL, UNet2DConditionModel
25
25
  from ...models.embeddings import get_timestep_embedding
26
26
  from ...models.lora import adjust_lora_scale_text_encoder
@@ -51,8 +51,8 @@ EXAMPLE_DOC_STRING = """
51
51
  >>> from diffusers import StableUnCLIPImg2ImgPipeline
52
52
 
53
53
  >>> pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
54
- ... "fusing/stable-unclip-2-1-l-img2img", torch_dtype=torch.float16
55
- ... ) # TODO update model path
54
+ ... "stabilityai/stable-diffusion-2-1-unclip-small", torch_dtype=torch.float16
55
+ ... )
56
56
  >>> pipe = pipe.to("cuda")
57
57
 
58
58
  >>> url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"
@@ -63,14 +63,14 @@ EXAMPLE_DOC_STRING = """
63
63
 
64
64
  >>> prompt = "A fantasy landscape, trending on artstation"
65
65
 
66
- >>> images = pipe(prompt, init_image).images
66
+ >>> images = pipe(init_image, prompt).images
67
67
  >>> images[0].save("fantasy_landscape.png")
68
68
  ```
69
69
  """
70
70
 
71
71
 
72
72
  class StableUnCLIPImg2ImgPipeline(
73
- DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
73
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin
74
74
  ):
75
75
  """
76
76
  Pipeline for text-guided image-to-image generation using stable unCLIP.
@@ -80,8 +80,8 @@ class StableUnCLIPImg2ImgPipeline(
80
80
 
81
81
  The pipeline also inherits the following loading methods:
82
82
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
83
- - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
84
- - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
83
+ - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
84
+ - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
85
85
 
86
86
  Args:
87
87
  feature_extractor ([`CLIPImageProcessor`]):
@@ -166,8 +166,8 @@ class StableUnCLIPImg2ImgPipeline(
166
166
  num_images_per_prompt,
167
167
  do_classifier_free_guidance,
168
168
  negative_prompt=None,
169
- prompt_embeds: Optional[torch.FloatTensor] = None,
170
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
169
+ prompt_embeds: Optional[torch.Tensor] = None,
170
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
171
171
  lora_scale: Optional[float] = None,
172
172
  **kwargs,
173
173
  ):
@@ -254,8 +254,8 @@ class StableUnCLIPImg2ImgPipeline(
254
254
  num_images_per_prompt,
255
255
  do_classifier_free_guidance,
256
256
  negative_prompt=None,
257
- prompt_embeds: Optional[torch.FloatTensor] = None,
258
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
257
+ prompt_embeds: Optional[torch.Tensor] = None,
258
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
259
259
  lora_scale: Optional[float] = None,
260
260
  clip_skip: Optional[int] = None,
261
261
  ):
@@ -275,10 +275,10 @@ class StableUnCLIPImg2ImgPipeline(
275
275
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
276
276
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
277
277
  less than `1`).
278
- prompt_embeds (`torch.FloatTensor`, *optional*):
278
+ prompt_embeds (`torch.Tensor`, *optional*):
279
279
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
280
280
  provided, text embeddings will be generated from `prompt` input argument.
281
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
281
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
282
282
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
283
283
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
284
284
  argument.
@@ -290,7 +290,7 @@ class StableUnCLIPImg2ImgPipeline(
290
290
  """
291
291
  # set lora scale so that monkey patched LoRA
292
292
  # function of text encoder can correctly access it
293
- if lora_scale is not None and isinstance(self, LoraLoaderMixin):
293
+ if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
294
294
  self._lora_scale = lora_scale
295
295
 
296
296
  # dynamically adjust the LoRA scale
@@ -422,9 +422,10 @@ class StableUnCLIPImg2ImgPipeline(
422
422
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
423
423
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
424
424
 
425
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
426
- # Retrieve the original scale by scaling back the LoRA layers
427
- unscale_lora_layers(self.text_encoder, lora_scale)
425
+ if self.text_encoder is not None:
426
+ if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
427
+ # Retrieve the original scale by scaling back the LoRA layers
428
+ unscale_lora_layers(self.text_encoder, lora_scale)
428
429
 
429
430
  return prompt_embeds, negative_prompt_embeds
430
431
 
@@ -537,13 +538,18 @@ class StableUnCLIPImg2ImgPipeline(
537
538
  and not isinstance(image, list)
538
539
  ):
539
540
  raise ValueError(
540
- "`image` has to be of type `torch.FloatTensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
541
+ "`image` has to be of type `torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
541
542
  f" {type(image)}"
542
543
  )
543
544
 
544
545
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
545
546
  def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
546
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
547
+ shape = (
548
+ batch_size,
549
+ num_channels_latents,
550
+ int(height) // self.vae_scale_factor,
551
+ int(width) // self.vae_scale_factor,
552
+ )
547
553
  if isinstance(generator, list) and len(generator) != batch_size:
548
554
  raise ValueError(
549
555
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -564,7 +570,7 @@ class StableUnCLIPImg2ImgPipeline(
564
570
  self,
565
571
  image_embeds: torch.Tensor,
566
572
  noise_level: int,
567
- noise: Optional[torch.FloatTensor] = None,
573
+ noise: Optional[torch.Tensor] = None,
568
574
  generator: Optional[torch.Generator] = None,
569
575
  ):
570
576
  """
@@ -610,7 +616,7 @@ class StableUnCLIPImg2ImgPipeline(
610
616
  @replace_example_docstring(EXAMPLE_DOC_STRING)
611
617
  def __call__(
612
618
  self,
613
- image: Union[torch.FloatTensor, PIL.Image.Image] = None,
619
+ image: Union[torch.Tensor, PIL.Image.Image] = None,
614
620
  prompt: Union[str, List[str]] = None,
615
621
  height: Optional[int] = None,
616
622
  width: Optional[int] = None,
@@ -620,16 +626,16 @@ class StableUnCLIPImg2ImgPipeline(
620
626
  num_images_per_prompt: Optional[int] = 1,
621
627
  eta: float = 0.0,
622
628
  generator: Optional[torch.Generator] = None,
623
- latents: Optional[torch.FloatTensor] = None,
624
- prompt_embeds: Optional[torch.FloatTensor] = None,
625
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
629
+ latents: Optional[torch.Tensor] = None,
630
+ prompt_embeds: Optional[torch.Tensor] = None,
631
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
626
632
  output_type: Optional[str] = "pil",
627
633
  return_dict: bool = True,
628
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
634
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
629
635
  callback_steps: int = 1,
630
636
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
631
637
  noise_level: int = 0,
632
- image_embeds: Optional[torch.FloatTensor] = None,
638
+ image_embeds: Optional[torch.Tensor] = None,
633
639
  clip_skip: Optional[int] = None,
634
640
  ):
635
641
  r"""
@@ -639,7 +645,7 @@ class StableUnCLIPImg2ImgPipeline(
639
645
  prompt (`str` or `List[str]`, *optional*):
640
646
  The prompt or prompts to guide the image generation. If not defined, either `prompt_embeds` will be
641
647
  used or prompt is initialized to `""`.
642
- image (`torch.FloatTensor` or `PIL.Image.Image`):
648
+ image (`torch.Tensor` or `PIL.Image.Image`):
643
649
  `Image` or tensor representing an image batch. The image is encoded to its CLIP embedding which the
644
650
  `unet` is conditioned on. The image is _not_ encoded by the `vae` and then used as the latents in the
645
651
  denoising process like it is in the standard Stable Diffusion text-guided image variation process.
@@ -664,14 +670,14 @@ class StableUnCLIPImg2ImgPipeline(
664
670
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
665
671
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
666
672
  generation deterministic.
667
- latents (`torch.FloatTensor`, *optional*):
673
+ latents (`torch.Tensor`, *optional*):
668
674
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
669
675
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
670
676
  tensor is generated by sampling using the supplied random `generator`.
671
- prompt_embeds (`torch.FloatTensor`, *optional*):
677
+ prompt_embeds (`torch.Tensor`, *optional*):
672
678
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
673
679
  provided, text embeddings are generated from the `prompt` input argument.
674
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
680
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
675
681
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
676
682
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
677
683
  output_type (`str`, *optional*, defaults to `"pil"`):
@@ -680,7 +686,7 @@ class StableUnCLIPImg2ImgPipeline(
680
686
  Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
681
687
  callback (`Callable`, *optional*):
682
688
  A function that calls every `callback_steps` steps during inference. The function is called with the
683
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
689
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
684
690
  callback_steps (`int`, *optional*, defaults to 1):
685
691
  The frequency at which the `callback` function is called. If not specified, the callback is called at
686
692
  every step.
@@ -690,7 +696,7 @@ class StableUnCLIPImg2ImgPipeline(
690
696
  noise_level (`int`, *optional*, defaults to `0`):
691
697
  The amount of noise to add to the image embeddings. A higher `noise_level` increases the variance in
692
698
  the final un-noised images. See [`StableUnCLIPPipeline.noise_image_embeddings`] for more details.
693
- image_embeds (`torch.FloatTensor`, *optional*):
699
+ image_embeds (`torch.Tensor`, *optional*):
694
700
  Pre-generated CLIP embeddings to condition the `unet` on. These latents are not used in the denoising
695
701
  process. If you want to provide pre-generated latents, pass them to `__call__` as `latents`.
696
702
  clip_skip (`int`, *optional*):
@@ -781,16 +787,17 @@ class StableUnCLIPImg2ImgPipeline(
781
787
 
782
788
  # 6. Prepare latent variables
783
789
  num_channels_latents = self.unet.config.in_channels
784
- latents = self.prepare_latents(
785
- batch_size=batch_size,
786
- num_channels_latents=num_channels_latents,
787
- height=height,
788
- width=width,
789
- dtype=prompt_embeds.dtype,
790
- device=device,
791
- generator=generator,
792
- latents=latents,
793
- )
790
+ if latents is None:
791
+ latents = self.prepare_latents(
792
+ batch_size=batch_size,
793
+ num_channels_latents=num_channels_latents,
794
+ height=height,
795
+ width=width,
796
+ dtype=prompt_embeds.dtype,
797
+ device=device,
798
+ generator=generator,
799
+ latents=latents,
800
+ )
794
801
 
795
802
  # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
796
803
  extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
@@ -31,6 +31,7 @@ def cosine_distance(image_embeds, text_embeds):
31
31
 
32
32
  class StableDiffusionSafetyChecker(PreTrainedModel):
33
33
  config_class = CLIPConfig
34
+ main_input_name = "clip_input"
34
35
 
35
36
  _no_split_modules = ["CLIPEncoderLayer"]
36
37
 
@@ -99,7 +100,7 @@ class StableDiffusionSafetyChecker(PreTrainedModel):
99
100
  return images, has_nsfw_concepts
100
101
 
101
102
  @torch.no_grad()
102
- def forward_onnx(self, clip_input: torch.FloatTensor, images: torch.FloatTensor):
103
+ def forward_onnx(self, clip_input: torch.Tensor, images: torch.Tensor):
103
104
  pooled_output = self.vision_model(clip_input)[1] # pooled_output
104
105
  image_embeds = self.visual_projection(pooled_output)
105
106
 
@@ -0,0 +1,54 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_flax_available,
9
+ is_torch_available,
10
+ is_transformers_available,
11
+ )
12
+
13
+
14
+ _dummy_objects = {}
15
+ _additional_imports = {}
16
+ _import_structure = {"pipeline_output": ["StableDiffusion3PipelineOutput"]}
17
+
18
+ try:
19
+ if not (is_transformers_available() and is_torch_available()):
20
+ raise OptionalDependencyNotAvailable()
21
+ except OptionalDependencyNotAvailable:
22
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
23
+
24
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
25
+ else:
26
+ _import_structure["pipeline_stable_diffusion_3"] = ["StableDiffusion3Pipeline"]
27
+ _import_structure["pipeline_stable_diffusion_3_img2img"] = ["StableDiffusion3Img2ImgPipeline"]
28
+ _import_structure["pipeline_stable_diffusion_3_inpaint"] = ["StableDiffusion3InpaintPipeline"]
29
+
30
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
31
+ try:
32
+ if not (is_transformers_available() and is_torch_available()):
33
+ raise OptionalDependencyNotAvailable()
34
+ except OptionalDependencyNotAvailable:
35
+ from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
36
+ else:
37
+ from .pipeline_stable_diffusion_3 import StableDiffusion3Pipeline
38
+ from .pipeline_stable_diffusion_3_img2img import StableDiffusion3Img2ImgPipeline
39
+ from .pipeline_stable_diffusion_3_inpaint import StableDiffusion3InpaintPipeline
40
+
41
+ else:
42
+ import sys
43
+
44
+ sys.modules[__name__] = _LazyModule(
45
+ __name__,
46
+ globals()["__file__"],
47
+ _import_structure,
48
+ module_spec=__spec__,
49
+ )
50
+
51
+ for name, value in _dummy_objects.items():
52
+ setattr(sys.modules[__name__], name, value)
53
+ for name, value in _additional_imports.items():
54
+ setattr(sys.modules[__name__], name, value)
@@ -0,0 +1,21 @@
1
+ from dataclasses import dataclass
2
+ from typing import List, Union
3
+
4
+ import numpy as np
5
+ import PIL.Image
6
+
7
+ from ...utils import BaseOutput
8
+
9
+
10
+ @dataclass
11
+ class StableDiffusion3PipelineOutput(BaseOutput):
12
+ """
13
+ Output class for Stable Diffusion pipelines.
14
+
15
+ Args:
16
+ images (`List[PIL.Image.Image]` or `np.ndarray`)
17
+ List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
18
+ num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
19
+ """
20
+
21
+ images: Union[List[PIL.Image.Image], np.ndarray]