diffusers 0.27.1__py3-none-any.whl → 0.32.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (445) hide show
  1. diffusers/__init__.py +233 -6
  2. diffusers/callbacks.py +209 -0
  3. diffusers/commands/env.py +102 -6
  4. diffusers/configuration_utils.py +45 -16
  5. diffusers/dependency_versions_table.py +4 -3
  6. diffusers/image_processor.py +434 -110
  7. diffusers/loaders/__init__.py +42 -9
  8. diffusers/loaders/ip_adapter.py +626 -36
  9. diffusers/loaders/lora_base.py +900 -0
  10. diffusers/loaders/lora_conversion_utils.py +991 -125
  11. diffusers/loaders/lora_pipeline.py +3812 -0
  12. diffusers/loaders/peft.py +571 -7
  13. diffusers/loaders/single_file.py +405 -173
  14. diffusers/loaders/single_file_model.py +385 -0
  15. diffusers/loaders/single_file_utils.py +1783 -713
  16. diffusers/loaders/textual_inversion.py +41 -23
  17. diffusers/loaders/transformer_flux.py +181 -0
  18. diffusers/loaders/transformer_sd3.py +89 -0
  19. diffusers/loaders/unet.py +464 -540
  20. diffusers/loaders/unet_loader_utils.py +163 -0
  21. diffusers/models/__init__.py +76 -7
  22. diffusers/models/activations.py +65 -10
  23. diffusers/models/adapter.py +53 -53
  24. diffusers/models/attention.py +605 -18
  25. diffusers/models/attention_flax.py +1 -1
  26. diffusers/models/attention_processor.py +4304 -687
  27. diffusers/models/autoencoders/__init__.py +8 -0
  28. diffusers/models/autoencoders/autoencoder_asym_kl.py +15 -17
  29. diffusers/models/autoencoders/autoencoder_dc.py +620 -0
  30. diffusers/models/autoencoders/autoencoder_kl.py +110 -28
  31. diffusers/models/autoencoders/autoencoder_kl_allegro.py +1149 -0
  32. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +1482 -0
  33. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +1176 -0
  34. diffusers/models/autoencoders/autoencoder_kl_ltx.py +1338 -0
  35. diffusers/models/autoencoders/autoencoder_kl_mochi.py +1166 -0
  36. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +19 -24
  37. diffusers/models/autoencoders/autoencoder_oobleck.py +464 -0
  38. diffusers/models/autoencoders/autoencoder_tiny.py +21 -18
  39. diffusers/models/autoencoders/consistency_decoder_vae.py +45 -20
  40. diffusers/models/autoencoders/vae.py +41 -29
  41. diffusers/models/autoencoders/vq_model.py +182 -0
  42. diffusers/models/controlnet.py +47 -800
  43. diffusers/models/controlnet_flux.py +70 -0
  44. diffusers/models/controlnet_sd3.py +68 -0
  45. diffusers/models/controlnet_sparsectrl.py +116 -0
  46. diffusers/models/controlnets/__init__.py +23 -0
  47. diffusers/models/controlnets/controlnet.py +872 -0
  48. diffusers/models/{controlnet_flax.py → controlnets/controlnet_flax.py} +9 -9
  49. diffusers/models/controlnets/controlnet_flux.py +536 -0
  50. diffusers/models/controlnets/controlnet_hunyuan.py +401 -0
  51. diffusers/models/controlnets/controlnet_sd3.py +489 -0
  52. diffusers/models/controlnets/controlnet_sparsectrl.py +788 -0
  53. diffusers/models/controlnets/controlnet_union.py +832 -0
  54. diffusers/models/controlnets/controlnet_xs.py +1946 -0
  55. diffusers/models/controlnets/multicontrolnet.py +183 -0
  56. diffusers/models/downsampling.py +85 -18
  57. diffusers/models/embeddings.py +1856 -158
  58. diffusers/models/embeddings_flax.py +23 -9
  59. diffusers/models/model_loading_utils.py +480 -0
  60. diffusers/models/modeling_flax_pytorch_utils.py +2 -1
  61. diffusers/models/modeling_flax_utils.py +2 -7
  62. diffusers/models/modeling_outputs.py +14 -0
  63. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  64. diffusers/models/modeling_utils.py +611 -146
  65. diffusers/models/normalization.py +361 -20
  66. diffusers/models/resnet.py +18 -23
  67. diffusers/models/transformers/__init__.py +16 -0
  68. diffusers/models/transformers/auraflow_transformer_2d.py +544 -0
  69. diffusers/models/transformers/cogvideox_transformer_3d.py +542 -0
  70. diffusers/models/transformers/dit_transformer_2d.py +240 -0
  71. diffusers/models/transformers/dual_transformer_2d.py +9 -8
  72. diffusers/models/transformers/hunyuan_transformer_2d.py +578 -0
  73. diffusers/models/transformers/latte_transformer_3d.py +327 -0
  74. diffusers/models/transformers/lumina_nextdit2d.py +340 -0
  75. diffusers/models/transformers/pixart_transformer_2d.py +445 -0
  76. diffusers/models/transformers/prior_transformer.py +13 -13
  77. diffusers/models/transformers/sana_transformer.py +488 -0
  78. diffusers/models/transformers/stable_audio_transformer.py +458 -0
  79. diffusers/models/transformers/t5_film_transformer.py +17 -19
  80. diffusers/models/transformers/transformer_2d.py +297 -187
  81. diffusers/models/transformers/transformer_allegro.py +422 -0
  82. diffusers/models/transformers/transformer_cogview3plus.py +386 -0
  83. diffusers/models/transformers/transformer_flux.py +593 -0
  84. diffusers/models/transformers/transformer_hunyuan_video.py +791 -0
  85. diffusers/models/transformers/transformer_ltx.py +469 -0
  86. diffusers/models/transformers/transformer_mochi.py +499 -0
  87. diffusers/models/transformers/transformer_sd3.py +461 -0
  88. diffusers/models/transformers/transformer_temporal.py +21 -19
  89. diffusers/models/unets/unet_1d.py +8 -8
  90. diffusers/models/unets/unet_1d_blocks.py +31 -31
  91. diffusers/models/unets/unet_2d.py +17 -10
  92. diffusers/models/unets/unet_2d_blocks.py +225 -149
  93. diffusers/models/unets/unet_2d_condition.py +41 -40
  94. diffusers/models/unets/unet_2d_condition_flax.py +6 -5
  95. diffusers/models/unets/unet_3d_blocks.py +192 -1057
  96. diffusers/models/unets/unet_3d_condition.py +22 -27
  97. diffusers/models/unets/unet_i2vgen_xl.py +22 -18
  98. diffusers/models/unets/unet_kandinsky3.py +2 -2
  99. diffusers/models/unets/unet_motion_model.py +1413 -89
  100. diffusers/models/unets/unet_spatio_temporal_condition.py +40 -16
  101. diffusers/models/unets/unet_stable_cascade.py +19 -18
  102. diffusers/models/unets/uvit_2d.py +2 -2
  103. diffusers/models/upsampling.py +95 -26
  104. diffusers/models/vq_model.py +12 -164
  105. diffusers/optimization.py +1 -1
  106. diffusers/pipelines/__init__.py +202 -3
  107. diffusers/pipelines/allegro/__init__.py +48 -0
  108. diffusers/pipelines/allegro/pipeline_allegro.py +938 -0
  109. diffusers/pipelines/allegro/pipeline_output.py +23 -0
  110. diffusers/pipelines/amused/pipeline_amused.py +12 -12
  111. diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
  112. diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
  113. diffusers/pipelines/animatediff/__init__.py +8 -0
  114. diffusers/pipelines/animatediff/pipeline_animatediff.py +122 -109
  115. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +1106 -0
  116. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1288 -0
  117. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +1010 -0
  118. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +236 -180
  119. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
  120. diffusers/pipelines/animatediff/pipeline_output.py +3 -2
  121. diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
  122. diffusers/pipelines/audioldm2/modeling_audioldm2.py +58 -39
  123. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +121 -36
  124. diffusers/pipelines/aura_flow/__init__.py +48 -0
  125. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +584 -0
  126. diffusers/pipelines/auto_pipeline.py +196 -28
  127. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  128. diffusers/pipelines/blip_diffusion/modeling_blip2.py +6 -6
  129. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
  130. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
  131. diffusers/pipelines/cogvideo/__init__.py +54 -0
  132. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +772 -0
  133. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +825 -0
  134. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +885 -0
  135. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +851 -0
  136. diffusers/pipelines/cogvideo/pipeline_output.py +20 -0
  137. diffusers/pipelines/cogview3/__init__.py +47 -0
  138. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
  139. diffusers/pipelines/cogview3/pipeline_output.py +21 -0
  140. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +6 -6
  141. diffusers/pipelines/controlnet/__init__.py +86 -80
  142. diffusers/pipelines/controlnet/multicontrolnet.py +7 -182
  143. diffusers/pipelines/controlnet/pipeline_controlnet.py +134 -87
  144. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
  145. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +93 -77
  146. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +88 -197
  147. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +136 -90
  148. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +176 -80
  149. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +125 -89
  150. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +1790 -0
  151. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +1501 -0
  152. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +1627 -0
  153. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -2
  154. diffusers/pipelines/controlnet_hunyuandit/__init__.py +48 -0
  155. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +1060 -0
  156. diffusers/pipelines/controlnet_sd3/__init__.py +57 -0
  157. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +1133 -0
  158. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
  159. diffusers/pipelines/controlnet_xs/__init__.py +68 -0
  160. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +916 -0
  161. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1111 -0
  162. diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
  163. diffusers/pipelines/deepfloyd_if/pipeline_if.py +16 -30
  164. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +20 -35
  165. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +23 -41
  166. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +22 -38
  167. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +25 -41
  168. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +19 -34
  169. diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
  170. diffusers/pipelines/deepfloyd_if/watermark.py +1 -1
  171. diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
  172. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +70 -30
  173. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +48 -25
  174. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
  175. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
  176. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +21 -20
  177. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +27 -29
  178. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +33 -27
  179. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +33 -23
  180. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +36 -30
  181. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +102 -69
  182. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
  183. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
  184. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
  185. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
  186. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
  187. diffusers/pipelines/dit/pipeline_dit.py +7 -4
  188. diffusers/pipelines/flux/__init__.py +69 -0
  189. diffusers/pipelines/flux/modeling_flux.py +47 -0
  190. diffusers/pipelines/flux/pipeline_flux.py +957 -0
  191. diffusers/pipelines/flux/pipeline_flux_control.py +889 -0
  192. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +945 -0
  193. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1141 -0
  194. diffusers/pipelines/flux/pipeline_flux_controlnet.py +1006 -0
  195. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +998 -0
  196. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1204 -0
  197. diffusers/pipelines/flux/pipeline_flux_fill.py +969 -0
  198. diffusers/pipelines/flux/pipeline_flux_img2img.py +856 -0
  199. diffusers/pipelines/flux/pipeline_flux_inpaint.py +1022 -0
  200. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +492 -0
  201. diffusers/pipelines/flux/pipeline_output.py +37 -0
  202. diffusers/pipelines/free_init_utils.py +41 -38
  203. diffusers/pipelines/free_noise_utils.py +596 -0
  204. diffusers/pipelines/hunyuan_video/__init__.py +48 -0
  205. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +687 -0
  206. diffusers/pipelines/hunyuan_video/pipeline_output.py +20 -0
  207. diffusers/pipelines/hunyuandit/__init__.py +48 -0
  208. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +916 -0
  209. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
  210. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
  211. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +32 -29
  212. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
  213. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
  214. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
  215. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  216. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +34 -31
  217. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
  218. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
  219. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
  220. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
  221. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
  222. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
  223. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
  224. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +22 -35
  225. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +26 -37
  226. diffusers/pipelines/kolors/__init__.py +54 -0
  227. diffusers/pipelines/kolors/pipeline_kolors.py +1070 -0
  228. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +1250 -0
  229. diffusers/pipelines/kolors/pipeline_output.py +21 -0
  230. diffusers/pipelines/kolors/text_encoder.py +889 -0
  231. diffusers/pipelines/kolors/tokenizer.py +338 -0
  232. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +82 -62
  233. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +77 -60
  234. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +12 -12
  235. diffusers/pipelines/latte/__init__.py +48 -0
  236. diffusers/pipelines/latte/pipeline_latte.py +881 -0
  237. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +80 -74
  238. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +85 -76
  239. diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
  240. diffusers/pipelines/ltx/__init__.py +50 -0
  241. diffusers/pipelines/ltx/pipeline_ltx.py +789 -0
  242. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +885 -0
  243. diffusers/pipelines/ltx/pipeline_output.py +20 -0
  244. diffusers/pipelines/lumina/__init__.py +48 -0
  245. diffusers/pipelines/lumina/pipeline_lumina.py +890 -0
  246. diffusers/pipelines/marigold/__init__.py +50 -0
  247. diffusers/pipelines/marigold/marigold_image_processing.py +576 -0
  248. diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
  249. diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
  250. diffusers/pipelines/mochi/__init__.py +48 -0
  251. diffusers/pipelines/mochi/pipeline_mochi.py +748 -0
  252. diffusers/pipelines/mochi/pipeline_output.py +20 -0
  253. diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
  254. diffusers/pipelines/pag/__init__.py +80 -0
  255. diffusers/pipelines/pag/pag_utils.py +243 -0
  256. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1328 -0
  257. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1543 -0
  258. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +1610 -0
  259. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1683 -0
  260. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +969 -0
  261. diffusers/pipelines/pag/pipeline_pag_kolors.py +1136 -0
  262. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +865 -0
  263. diffusers/pipelines/pag/pipeline_pag_sana.py +886 -0
  264. diffusers/pipelines/pag/pipeline_pag_sd.py +1062 -0
  265. diffusers/pipelines/pag/pipeline_pag_sd_3.py +994 -0
  266. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +1058 -0
  267. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +866 -0
  268. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1094 -0
  269. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +1356 -0
  270. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +1345 -0
  271. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +1544 -0
  272. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +1776 -0
  273. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
  274. diffusers/pipelines/pia/pipeline_pia.py +74 -164
  275. diffusers/pipelines/pipeline_flax_utils.py +5 -10
  276. diffusers/pipelines/pipeline_loading_utils.py +515 -53
  277. diffusers/pipelines/pipeline_utils.py +411 -222
  278. diffusers/pipelines/pixart_alpha/__init__.py +8 -1
  279. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +76 -93
  280. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +873 -0
  281. diffusers/pipelines/sana/__init__.py +47 -0
  282. diffusers/pipelines/sana/pipeline_output.py +21 -0
  283. diffusers/pipelines/sana/pipeline_sana.py +884 -0
  284. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +27 -23
  285. diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
  286. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
  287. diffusers/pipelines/shap_e/renderer.py +1 -1
  288. diffusers/pipelines/stable_audio/__init__.py +50 -0
  289. diffusers/pipelines/stable_audio/modeling_stable_audio.py +158 -0
  290. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +756 -0
  291. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +71 -25
  292. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
  293. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +35 -34
  294. diffusers/pipelines/stable_diffusion/__init__.py +0 -1
  295. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +20 -11
  296. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  297. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  298. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
  299. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +145 -79
  300. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +43 -28
  301. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
  302. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +100 -68
  303. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +109 -201
  304. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +131 -32
  305. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +247 -87
  306. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +30 -29
  307. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +35 -27
  308. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +49 -42
  309. diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
  310. diffusers/pipelines/stable_diffusion_3/__init__.py +54 -0
  311. diffusers/pipelines/stable_diffusion_3/pipeline_output.py +21 -0
  312. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +1140 -0
  313. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +1036 -0
  314. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +1250 -0
  315. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +29 -20
  316. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +59 -58
  317. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +31 -25
  318. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +38 -22
  319. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -24
  320. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -23
  321. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +107 -67
  322. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +316 -69
  323. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
  324. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  325. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +98 -30
  326. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +121 -83
  327. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +161 -105
  328. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +142 -218
  329. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -29
  330. diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
  331. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
  332. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +69 -39
  333. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +105 -74
  334. diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
  335. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +29 -49
  336. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +32 -93
  337. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +37 -25
  338. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +54 -40
  339. diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
  340. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
  341. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
  342. diffusers/pipelines/unidiffuser/modeling_uvit.py +12 -12
  343. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +29 -28
  344. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
  345. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
  346. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +6 -8
  347. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
  348. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
  349. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +15 -14
  350. diffusers/{models/dual_transformer_2d.py → quantizers/__init__.py} +2 -6
  351. diffusers/quantizers/auto.py +139 -0
  352. diffusers/quantizers/base.py +233 -0
  353. diffusers/quantizers/bitsandbytes/__init__.py +2 -0
  354. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +561 -0
  355. diffusers/quantizers/bitsandbytes/utils.py +306 -0
  356. diffusers/quantizers/gguf/__init__.py +1 -0
  357. diffusers/quantizers/gguf/gguf_quantizer.py +159 -0
  358. diffusers/quantizers/gguf/utils.py +456 -0
  359. diffusers/quantizers/quantization_config.py +669 -0
  360. diffusers/quantizers/torchao/__init__.py +15 -0
  361. diffusers/quantizers/torchao/torchao_quantizer.py +292 -0
  362. diffusers/schedulers/__init__.py +12 -2
  363. diffusers/schedulers/deprecated/__init__.py +1 -1
  364. diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
  365. diffusers/schedulers/scheduling_amused.py +5 -5
  366. diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
  367. diffusers/schedulers/scheduling_consistency_models.py +23 -25
  368. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +572 -0
  369. diffusers/schedulers/scheduling_ddim.py +27 -26
  370. diffusers/schedulers/scheduling_ddim_cogvideox.py +452 -0
  371. diffusers/schedulers/scheduling_ddim_flax.py +2 -1
  372. diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
  373. diffusers/schedulers/scheduling_ddim_parallel.py +32 -31
  374. diffusers/schedulers/scheduling_ddpm.py +27 -30
  375. diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
  376. diffusers/schedulers/scheduling_ddpm_parallel.py +33 -36
  377. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
  378. diffusers/schedulers/scheduling_deis_multistep.py +150 -50
  379. diffusers/schedulers/scheduling_dpm_cogvideox.py +489 -0
  380. diffusers/schedulers/scheduling_dpmsolver_multistep.py +221 -84
  381. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
  382. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +158 -52
  383. diffusers/schedulers/scheduling_dpmsolver_sde.py +153 -34
  384. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +275 -86
  385. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +81 -57
  386. diffusers/schedulers/scheduling_edm_euler.py +62 -39
  387. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +30 -29
  388. diffusers/schedulers/scheduling_euler_discrete.py +255 -74
  389. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +458 -0
  390. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +320 -0
  391. diffusers/schedulers/scheduling_heun_discrete.py +174 -46
  392. diffusers/schedulers/scheduling_ipndm.py +9 -9
  393. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +138 -29
  394. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +132 -26
  395. diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
  396. diffusers/schedulers/scheduling_lcm.py +23 -29
  397. diffusers/schedulers/scheduling_lms_discrete.py +105 -28
  398. diffusers/schedulers/scheduling_pndm.py +20 -20
  399. diffusers/schedulers/scheduling_repaint.py +21 -21
  400. diffusers/schedulers/scheduling_sasolver.py +157 -60
  401. diffusers/schedulers/scheduling_sde_ve.py +19 -19
  402. diffusers/schedulers/scheduling_tcd.py +41 -36
  403. diffusers/schedulers/scheduling_unclip.py +19 -16
  404. diffusers/schedulers/scheduling_unipc_multistep.py +243 -47
  405. diffusers/schedulers/scheduling_utils.py +12 -5
  406. diffusers/schedulers/scheduling_utils_flax.py +1 -3
  407. diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
  408. diffusers/training_utils.py +214 -30
  409. diffusers/utils/__init__.py +17 -1
  410. diffusers/utils/constants.py +3 -0
  411. diffusers/utils/doc_utils.py +1 -0
  412. diffusers/utils/dummy_pt_objects.py +592 -7
  413. diffusers/utils/dummy_torch_and_torchsde_objects.py +15 -0
  414. diffusers/utils/dummy_torch_and_transformers_and_sentencepiece_objects.py +47 -0
  415. diffusers/utils/dummy_torch_and_transformers_objects.py +1001 -71
  416. diffusers/utils/dynamic_modules_utils.py +34 -29
  417. diffusers/utils/export_utils.py +50 -6
  418. diffusers/utils/hub_utils.py +131 -17
  419. diffusers/utils/import_utils.py +210 -8
  420. diffusers/utils/loading_utils.py +118 -5
  421. diffusers/utils/logging.py +4 -2
  422. diffusers/utils/peft_utils.py +37 -7
  423. diffusers/utils/state_dict_utils.py +13 -2
  424. diffusers/utils/testing_utils.py +193 -11
  425. diffusers/utils/torch_utils.py +4 -0
  426. diffusers/video_processor.py +113 -0
  427. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/METADATA +82 -91
  428. diffusers-0.32.2.dist-info/RECORD +550 -0
  429. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/WHEEL +1 -1
  430. diffusers/loaders/autoencoder.py +0 -146
  431. diffusers/loaders/controlnet.py +0 -136
  432. diffusers/loaders/lora.py +0 -1349
  433. diffusers/models/prior_transformer.py +0 -12
  434. diffusers/models/t5_film_transformer.py +0 -70
  435. diffusers/models/transformer_2d.py +0 -25
  436. diffusers/models/transformer_temporal.py +0 -34
  437. diffusers/models/unet_1d.py +0 -26
  438. diffusers/models/unet_1d_blocks.py +0 -203
  439. diffusers/models/unet_2d.py +0 -27
  440. diffusers/models/unet_2d_blocks.py +0 -375
  441. diffusers/models/unet_2d_condition.py +0 -25
  442. diffusers-0.27.1.dist-info/RECORD +0 -399
  443. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/LICENSE +0 -0
  444. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/entry_points.txt +0 -0
  445. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/top_level.txt +0 -0
@@ -22,7 +22,7 @@ from torch.nn import functional as F
22
22
  from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
23
23
 
24
24
  from ...image_processor import VaeImageProcessor
25
- from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
25
+ from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
26
26
  from ...models import AutoencoderKL, UNet2DConditionModel
27
27
  from ...models.attention_processor import Attention
28
28
  from ...models.lora import adjust_lora_scale_text_encoder
@@ -254,8 +254,8 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
254
254
  num_images_per_prompt,
255
255
  do_classifier_free_guidance,
256
256
  negative_prompt=None,
257
- prompt_embeds: Optional[torch.FloatTensor] = None,
258
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
257
+ prompt_embeds: Optional[torch.Tensor] = None,
258
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
259
259
  lora_scale: Optional[float] = None,
260
260
  **kwargs,
261
261
  ):
@@ -287,8 +287,8 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
287
287
  num_images_per_prompt,
288
288
  do_classifier_free_guidance,
289
289
  negative_prompt=None,
290
- prompt_embeds: Optional[torch.FloatTensor] = None,
291
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
290
+ prompt_embeds: Optional[torch.Tensor] = None,
291
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
292
292
  lora_scale: Optional[float] = None,
293
293
  clip_skip: Optional[int] = None,
294
294
  ):
@@ -308,10 +308,10 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
308
308
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
309
309
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
310
310
  less than `1`).
311
- prompt_embeds (`torch.FloatTensor`, *optional*):
311
+ prompt_embeds (`torch.Tensor`, *optional*):
312
312
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
313
313
  provided, text embeddings will be generated from `prompt` input argument.
314
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
314
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
315
315
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
316
316
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
317
317
  argument.
@@ -323,7 +323,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
323
323
  """
324
324
  # set lora scale so that monkey patched LoRA
325
325
  # function of text encoder can correctly access it
326
- if lora_scale is not None and isinstance(self, LoraLoaderMixin):
326
+ if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
327
327
  self._lora_scale = lora_scale
328
328
 
329
329
  # dynamically adjust the LoRA scale
@@ -455,9 +455,10 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
455
455
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
456
456
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
457
457
 
458
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
459
- # Retrieve the original scale by scaling back the LoRA layers
460
- unscale_lora_layers(self.text_encoder, lora_scale)
458
+ if self.text_encoder is not None:
459
+ if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
460
+ # Retrieve the original scale by scaling back the LoRA layers
461
+ unscale_lora_layers(self.text_encoder, lora_scale)
461
462
 
462
463
  return prompt_embeds, negative_prompt_embeds
463
464
 
@@ -581,7 +582,12 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
581
582
 
582
583
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
583
584
  def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
584
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
585
+ shape = (
586
+ batch_size,
587
+ num_channels_latents,
588
+ int(height) // self.vae_scale_factor,
589
+ int(width) // self.vae_scale_factor,
590
+ )
585
591
  if isinstance(generator, list) and len(generator) != batch_size:
586
592
  raise ValueError(
587
593
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -741,12 +747,12 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
741
747
  num_images_per_prompt: int = 1,
742
748
  eta: float = 0.0,
743
749
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
744
- latents: Optional[torch.FloatTensor] = None,
745
- prompt_embeds: Optional[torch.FloatTensor] = None,
746
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
750
+ latents: Optional[torch.Tensor] = None,
751
+ prompt_embeds: Optional[torch.Tensor] = None,
752
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
747
753
  output_type: Optional[str] = "pil",
748
754
  return_dict: bool = True,
749
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
755
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
750
756
  callback_steps: int = 1,
751
757
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
752
758
  max_iter_to_alter: int = 25,
@@ -784,14 +790,14 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
784
790
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
785
791
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
786
792
  generation deterministic.
787
- latents (`torch.FloatTensor`, *optional*):
793
+ latents (`torch.Tensor`, *optional*):
788
794
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
789
795
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
790
796
  tensor is generated by sampling using the supplied random `generator`.
791
- prompt_embeds (`torch.FloatTensor`, *optional*):
797
+ prompt_embeds (`torch.Tensor`, *optional*):
792
798
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
793
799
  provided, text embeddings are generated from the `prompt` input argument.
794
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
800
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
795
801
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
796
802
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
797
803
  output_type (`str`, *optional*, defaults to `"pil"`):
@@ -801,7 +807,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
801
807
  plain tuple.
802
808
  callback (`Callable`, *optional*):
803
809
  A function that calls every `callback_steps` steps during inference. The function is called with the
804
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
810
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
805
811
  callback_steps (`int`, *optional*, defaults to 1):
806
812
  The frequency at which the `callback` function is called. If not specified, the callback is called at
807
813
  every step.
@@ -902,6 +908,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
902
908
  if attn_res is None:
903
909
  attn_res = int(np.ceil(width / 32)), int(np.ceil(height / 32))
904
910
  self.attention_store = AttentionStore(attn_res)
911
+ original_attn_proc = self.unet.attn_processors
905
912
  self.register_attention_control()
906
913
 
907
914
  # default config for step size from original repo
@@ -1016,6 +1023,8 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
1016
1023
 
1017
1024
  image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
1018
1025
  self.maybe_free_model_hooks()
1026
+ # make sure to set the original attention processors back
1027
+ self.unet.set_attn_processor(original_attn_proc)
1019
1028
 
1020
1029
  if not return_dict:
1021
1030
  return (image, has_nsfw_concept)
@@ -24,7 +24,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
24
24
 
25
25
  from ...configuration_utils import FrozenDict
26
26
  from ...image_processor import VaeImageProcessor
27
- from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
27
+ from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
28
28
  from ...models import AutoencoderKL, UNet2DConditionModel
29
29
  from ...models.lora import adjust_lora_scale_text_encoder
30
30
  from ...schedulers import DDIMInverseScheduler, KarrasDiffusionSchedulers
@@ -53,7 +53,7 @@ class DiffEditInversionPipelineOutput(BaseOutput):
53
53
  Output class for Stable Diffusion pipelines.
54
54
 
55
55
  Args:
56
- latents (`torch.FloatTensor`)
56
+ latents (`torch.Tensor`)
57
57
  inverted latents tensor
58
58
  images (`List[PIL.Image.Image]` or `np.ndarray`)
59
59
  List of denoised PIL images of length `num_timesteps * batch_size` or numpy array of shape `(num_timesteps,
@@ -61,7 +61,7 @@ class DiffEditInversionPipelineOutput(BaseOutput):
61
61
  diffusion pipeline.
62
62
  """
63
63
 
64
- latents: torch.FloatTensor
64
+ latents: torch.Tensor
65
65
  images: Union[List[PIL.Image.Image], np.ndarray]
66
66
 
67
67
 
@@ -85,10 +85,9 @@ EXAMPLE_DOC_STRING = """
85
85
 
86
86
  >>> init_image = download_image(img_url).resize((768, 768))
87
87
 
88
- >>> pipe = StableDiffusionDiffEditPipeline.from_pretrained(
88
+ >>> pipeline = StableDiffusionDiffEditPipeline.from_pretrained(
89
89
  ... "stabilityai/stable-diffusion-2-1", torch_dtype=torch.float16
90
90
  ... )
91
- >>> pipe = pipe.to("cuda")
92
91
 
93
92
  >>> pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
94
93
  >>> pipeline.inverse_scheduler = DDIMInverseScheduler.from_config(pipeline.scheduler.config)
@@ -97,9 +96,9 @@ EXAMPLE_DOC_STRING = """
97
96
  >>> mask_prompt = "A bowl of fruits"
98
97
  >>> prompt = "A bowl of pears"
99
98
 
100
- >>> mask_image = pipe.generate_mask(image=init_image, source_prompt=prompt, target_prompt=mask_prompt)
101
- >>> image_latents = pipe.invert(image=init_image, prompt=mask_prompt).latents
102
- >>> image = pipe(prompt=prompt, mask_image=mask_image, image_latents=image_latents).images[0]
99
+ >>> mask_image = pipeline.generate_mask(image=init_image, source_prompt=prompt, target_prompt=mask_prompt)
100
+ >>> image_latents = pipeline.invert(image=init_image, prompt=mask_prompt).latents
101
+ >>> image = pipeline(prompt=prompt, mask_image=mask_image, image_latents=image_latents).images[0]
103
102
  ```
104
103
  """
105
104
 
@@ -122,10 +121,9 @@ EXAMPLE_INVERT_DOC_STRING = """
122
121
 
123
122
  >>> init_image = download_image(img_url).resize((768, 768))
124
123
 
125
- >>> pipe = StableDiffusionDiffEditPipeline.from_pretrained(
124
+ >>> pipeline = StableDiffusionDiffEditPipeline.from_pretrained(
126
125
  ... "stabilityai/stable-diffusion-2-1", torch_dtype=torch.float16
127
126
  ... )
128
- >>> pipe = pipe.to("cuda")
129
127
 
130
128
  >>> pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
131
129
  >>> pipeline.inverse_scheduler = DDIMInverseScheduler.from_config(pipeline.scheduler.config)
@@ -133,7 +131,7 @@ EXAMPLE_INVERT_DOC_STRING = """
133
131
 
134
132
  >>> prompt = "A bowl of fruits"
135
133
 
136
- >>> inverted_latents = pipe.invert(image=init_image, prompt=prompt).latents
134
+ >>> inverted_latents = pipeline.invert(image=init_image, prompt=prompt).latents
137
135
  ```
138
136
  """
139
137
 
@@ -185,7 +183,7 @@ def preprocess(image):
185
183
  def preprocess_mask(mask, batch_size: int = 1):
186
184
  if not isinstance(mask, torch.Tensor):
187
185
  # preprocess mask
188
- if isinstance(mask, PIL.Image.Image) or isinstance(mask, np.ndarray):
186
+ if isinstance(mask, (PIL.Image.Image, np.ndarray)):
189
187
  mask = [mask]
190
188
 
191
189
  if isinstance(mask, list):
@@ -236,7 +234,7 @@ def preprocess_mask(mask, batch_size: int = 1):
236
234
 
237
235
 
238
236
  class StableDiffusionDiffEditPipeline(
239
- DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
237
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin
240
238
  ):
241
239
  r"""
242
240
  <Tip warning={true}>
@@ -252,8 +250,8 @@ class StableDiffusionDiffEditPipeline(
252
250
 
253
251
  The pipeline also inherits the following loading and saving methods:
254
252
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
255
- - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
256
- - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
253
+ - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
254
+ - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
257
255
 
258
256
  Args:
259
257
  vae ([`AutoencoderKL`]):
@@ -381,8 +379,8 @@ class StableDiffusionDiffEditPipeline(
381
379
  num_images_per_prompt,
382
380
  do_classifier_free_guidance,
383
381
  negative_prompt=None,
384
- prompt_embeds: Optional[torch.FloatTensor] = None,
385
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
382
+ prompt_embeds: Optional[torch.Tensor] = None,
383
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
386
384
  lora_scale: Optional[float] = None,
387
385
  **kwargs,
388
386
  ):
@@ -414,8 +412,8 @@ class StableDiffusionDiffEditPipeline(
414
412
  num_images_per_prompt,
415
413
  do_classifier_free_guidance,
416
414
  negative_prompt=None,
417
- prompt_embeds: Optional[torch.FloatTensor] = None,
418
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
415
+ prompt_embeds: Optional[torch.Tensor] = None,
416
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
419
417
  lora_scale: Optional[float] = None,
420
418
  clip_skip: Optional[int] = None,
421
419
  ):
@@ -435,10 +433,10 @@ class StableDiffusionDiffEditPipeline(
435
433
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
436
434
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
437
435
  less than `1`).
438
- prompt_embeds (`torch.FloatTensor`, *optional*):
436
+ prompt_embeds (`torch.Tensor`, *optional*):
439
437
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
440
438
  provided, text embeddings will be generated from `prompt` input argument.
441
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
439
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
442
440
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
443
441
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
444
442
  argument.
@@ -450,7 +448,7 @@ class StableDiffusionDiffEditPipeline(
450
448
  """
451
449
  # set lora scale so that monkey patched LoRA
452
450
  # function of text encoder can correctly access it
453
- if lora_scale is not None and isinstance(self, LoraLoaderMixin):
451
+ if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
454
452
  self._lora_scale = lora_scale
455
453
 
456
454
  # dynamically adjust the LoRA scale
@@ -582,9 +580,10 @@ class StableDiffusionDiffEditPipeline(
582
580
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
583
581
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
584
582
 
585
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
586
- # Retrieve the original scale by scaling back the LoRA layers
587
- unscale_lora_layers(self.text_encoder, lora_scale)
583
+ if self.text_encoder is not None:
584
+ if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
585
+ # Retrieve the original scale by scaling back the LoRA layers
586
+ unscale_lora_layers(self.text_encoder, lora_scale)
588
587
 
589
588
  return prompt_embeds, negative_prompt_embeds
590
589
 
@@ -716,15 +715,12 @@ class StableDiffusionDiffEditPipeline(
716
715
  f" `source_negative_prompt_embeds` {source_negative_prompt_embeds.shape}."
717
716
  )
718
717
 
719
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps
720
718
  def get_timesteps(self, num_inference_steps, strength, device):
721
719
  # get the original timestep using init_timestep
722
720
  init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
723
721
 
724
722
  t_start = max(num_inference_steps - init_timestep, 0)
725
723
  timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
726
- if hasattr(self.scheduler, "set_begin_index"):
727
- self.scheduler.set_begin_index(t_start * self.scheduler.order)
728
724
 
729
725
  return timesteps, num_inference_steps - t_start
730
726
 
@@ -743,7 +739,12 @@ class StableDiffusionDiffEditPipeline(
743
739
 
744
740
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
745
741
  def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
746
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
742
+ shape = (
743
+ batch_size,
744
+ num_channels_latents,
745
+ int(height) // self.vae_scale_factor,
746
+ int(width) // self.vae_scale_factor,
747
+ )
747
748
  if isinstance(generator, list) and len(generator) != batch_size:
748
749
  raise ValueError(
749
750
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -829,15 +830,15 @@ class StableDiffusionDiffEditPipeline(
829
830
  @replace_example_docstring(EXAMPLE_DOC_STRING)
830
831
  def generate_mask(
831
832
  self,
832
- image: Union[torch.FloatTensor, PIL.Image.Image] = None,
833
+ image: Union[torch.Tensor, PIL.Image.Image] = None,
833
834
  target_prompt: Optional[Union[str, List[str]]] = None,
834
835
  target_negative_prompt: Optional[Union[str, List[str]]] = None,
835
- target_prompt_embeds: Optional[torch.FloatTensor] = None,
836
- target_negative_prompt_embeds: Optional[torch.FloatTensor] = None,
836
+ target_prompt_embeds: Optional[torch.Tensor] = None,
837
+ target_negative_prompt_embeds: Optional[torch.Tensor] = None,
837
838
  source_prompt: Optional[Union[str, List[str]]] = None,
838
839
  source_negative_prompt: Optional[Union[str, List[str]]] = None,
839
- source_prompt_embeds: Optional[torch.FloatTensor] = None,
840
- source_negative_prompt_embeds: Optional[torch.FloatTensor] = None,
840
+ source_prompt_embeds: Optional[torch.Tensor] = None,
841
+ source_negative_prompt_embeds: Optional[torch.Tensor] = None,
841
842
  num_maps_per_mask: Optional[int] = 10,
842
843
  mask_encode_strength: Optional[float] = 0.5,
843
844
  mask_thresholding_ratio: Optional[float] = 3.0,
@@ -859,10 +860,10 @@ class StableDiffusionDiffEditPipeline(
859
860
  target_negative_prompt (`str` or `List[str]`, *optional*):
860
861
  The prompt or prompts to guide what to not include in image generation. If not defined, you need to
861
862
  pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
862
- target_prompt_embeds (`torch.FloatTensor`, *optional*):
863
+ target_prompt_embeds (`torch.Tensor`, *optional*):
863
864
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
864
865
  provided, text embeddings are generated from the `prompt` input argument.
865
- target_negative_prompt_embeds (`torch.FloatTensor`, *optional*):
866
+ target_negative_prompt_embeds (`torch.Tensor`, *optional*):
866
867
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
867
868
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
868
869
  source_prompt (`str` or `List[str]`, *optional*):
@@ -871,11 +872,11 @@ class StableDiffusionDiffEditPipeline(
871
872
  source_negative_prompt (`str` or `List[str]`, *optional*):
872
873
  The prompt or prompts to guide semantic mask generation away from using DiffEdit. If not defined, you
873
874
  need to pass `source_negative_prompt_embeds` or `source_image` instead.
874
- source_prompt_embeds (`torch.FloatTensor`, *optional*):
875
+ source_prompt_embeds (`torch.Tensor`, *optional*):
875
876
  Pre-generated text embeddings to guide the semantic mask generation. Can be used to easily tweak text
876
877
  inputs (prompt weighting). If not provided, text embeddings are generated from `source_prompt` input
877
878
  argument.
878
- source_negative_prompt_embeds (`torch.FloatTensor`, *optional*):
879
+ source_negative_prompt_embeds (`torch.Tensor`, *optional*):
879
880
  Pre-generated text embeddings to negatively guide the semantic mask generation. Can be used to easily
880
881
  tweak text inputs (prompt weighting). If not provided, text embeddings are generated from
881
882
  `source_negative_prompt` input argument.
@@ -1049,18 +1050,18 @@ class StableDiffusionDiffEditPipeline(
1049
1050
  def invert(
1050
1051
  self,
1051
1052
  prompt: Optional[Union[str, List[str]]] = None,
1052
- image: Union[torch.FloatTensor, PIL.Image.Image] = None,
1053
+ image: Union[torch.Tensor, PIL.Image.Image] = None,
1053
1054
  num_inference_steps: int = 50,
1054
1055
  inpaint_strength: float = 0.8,
1055
1056
  guidance_scale: float = 7.5,
1056
1057
  negative_prompt: Optional[Union[str, List[str]]] = None,
1057
1058
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
1058
- prompt_embeds: Optional[torch.FloatTensor] = None,
1059
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
1059
+ prompt_embeds: Optional[torch.Tensor] = None,
1060
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
1060
1061
  decode_latents: bool = False,
1061
1062
  output_type: Optional[str] = "pil",
1062
1063
  return_dict: bool = True,
1063
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
1064
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
1064
1065
  callback_steps: Optional[int] = 1,
1065
1066
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
1066
1067
  lambda_auto_corr: float = 20.0,
@@ -1093,10 +1094,10 @@ class StableDiffusionDiffEditPipeline(
1093
1094
  generator (`torch.Generator`, *optional*):
1094
1095
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
1095
1096
  generation deterministic.
1096
- prompt_embeds (`torch.FloatTensor`, *optional*):
1097
+ prompt_embeds (`torch.Tensor`, *optional*):
1097
1098
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
1098
1099
  provided, text embeddings are generated from the `prompt` input argument.
1099
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
1100
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
1100
1101
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
1101
1102
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
1102
1103
  decode_latents (`bool`, *optional*, defaults to `False`):
@@ -1109,7 +1110,7 @@ class StableDiffusionDiffEditPipeline(
1109
1110
  plain tuple.
1110
1111
  callback (`Callable`, *optional*):
1111
1112
  A function that calls every `callback_steps` steps during inference. The function is called with the
1112
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
1113
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
1113
1114
  callback_steps (`int`, *optional*, defaults to 1):
1114
1115
  The frequency at which the `callback` function is called. If not specified, the callback is called at
1115
1116
  every step.
@@ -1287,8 +1288,8 @@ class StableDiffusionDiffEditPipeline(
1287
1288
  def __call__(
1288
1289
  self,
1289
1290
  prompt: Optional[Union[str, List[str]]] = None,
1290
- mask_image: Union[torch.FloatTensor, PIL.Image.Image] = None,
1291
- image_latents: Union[torch.FloatTensor, PIL.Image.Image] = None,
1291
+ mask_image: Union[torch.Tensor, PIL.Image.Image] = None,
1292
+ image_latents: Union[torch.Tensor, PIL.Image.Image] = None,
1292
1293
  inpaint_strength: Optional[float] = 0.8,
1293
1294
  num_inference_steps: int = 50,
1294
1295
  guidance_scale: float = 7.5,
@@ -1296,15 +1297,15 @@ class StableDiffusionDiffEditPipeline(
1296
1297
  num_images_per_prompt: Optional[int] = 1,
1297
1298
  eta: float = 0.0,
1298
1299
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
1299
- latents: Optional[torch.FloatTensor] = None,
1300
- prompt_embeds: Optional[torch.FloatTensor] = None,
1301
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
1300
+ latents: Optional[torch.Tensor] = None,
1301
+ prompt_embeds: Optional[torch.Tensor] = None,
1302
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
1302
1303
  output_type: Optional[str] = "pil",
1303
1304
  return_dict: bool = True,
1304
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
1305
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
1305
1306
  callback_steps: int = 1,
1306
1307
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
1307
- clip_ckip: int = None,
1308
+ clip_skip: int = None,
1308
1309
  ):
1309
1310
  r"""
1310
1311
  The call function to the pipeline for generation.
@@ -1317,7 +1318,7 @@ class StableDiffusionDiffEditPipeline(
1317
1318
  repainted, while black pixels are preserved. If `mask_image` is a PIL image, it is converted to a
1318
1319
  single channel (luminance) before use. If it's a tensor, it should contain one color channel (L)
1319
1320
  instead of 3, so the expected shape would be `(B, 1, H, W)`.
1320
- image_latents (`PIL.Image.Image` or `torch.FloatTensor`):
1321
+ image_latents (`PIL.Image.Image` or `torch.Tensor`):
1321
1322
  Partially noised image latents from the inversion process to be used as inputs for image generation.
1322
1323
  inpaint_strength (`float`, *optional*, defaults to 0.8):
1323
1324
  Indicates extent to inpaint the masked area. Must be between 0 and 1. When `inpaint_strength` is 1, the
@@ -1341,14 +1342,14 @@ class StableDiffusionDiffEditPipeline(
1341
1342
  generator (`torch.Generator`, *optional*):
1342
1343
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
1343
1344
  generation deterministic.
1344
- latents (`torch.FloatTensor`, *optional*):
1345
+ latents (`torch.Tensor`, *optional*):
1345
1346
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
1346
1347
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
1347
1348
  tensor is generated by sampling using the supplied random `generator`.
1348
- prompt_embeds (`torch.FloatTensor`, *optional*):
1349
+ prompt_embeds (`torch.Tensor`, *optional*):
1349
1350
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
1350
1351
  provided, text embeddings are generated from the `prompt` input argument.
1351
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
1352
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
1352
1353
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
1353
1354
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
1354
1355
  output_type (`str`, *optional*, defaults to `"pil"`):
@@ -1358,7 +1359,7 @@ class StableDiffusionDiffEditPipeline(
1358
1359
  plain tuple.
1359
1360
  callback (`Callable`, *optional*):
1360
1361
  A function that calls every `callback_steps` steps during inference. The function is called with the
1361
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
1362
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
1362
1363
  callback_steps (`int`, *optional*, defaults to 1):
1363
1364
  The frequency at which the `callback` function is called. If not specified, the callback is called at
1364
1365
  every step.
@@ -1426,7 +1427,7 @@ class StableDiffusionDiffEditPipeline(
1426
1427
  prompt_embeds=prompt_embeds,
1427
1428
  negative_prompt_embeds=negative_prompt_embeds,
1428
1429
  lora_scale=text_encoder_lora_scale,
1429
- clip_skip=clip_ckip,
1430
+ clip_skip=clip_skip,
1430
1431
  )
1431
1432
  # For classifier free guidance, we need to do two forward passes.
1432
1433
  # Here we concatenate the unconditional and text embeddings into a single batch