diffusers 0.27.0__py3-none-any.whl → 0.32.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (445) hide show
  1. diffusers/__init__.py +233 -6
  2. diffusers/callbacks.py +209 -0
  3. diffusers/commands/env.py +102 -6
  4. diffusers/configuration_utils.py +45 -16
  5. diffusers/dependency_versions_table.py +4 -3
  6. diffusers/image_processor.py +434 -110
  7. diffusers/loaders/__init__.py +42 -9
  8. diffusers/loaders/ip_adapter.py +626 -36
  9. diffusers/loaders/lora_base.py +900 -0
  10. diffusers/loaders/lora_conversion_utils.py +991 -125
  11. diffusers/loaders/lora_pipeline.py +3812 -0
  12. diffusers/loaders/peft.py +571 -7
  13. diffusers/loaders/single_file.py +405 -173
  14. diffusers/loaders/single_file_model.py +385 -0
  15. diffusers/loaders/single_file_utils.py +1783 -713
  16. diffusers/loaders/textual_inversion.py +41 -23
  17. diffusers/loaders/transformer_flux.py +181 -0
  18. diffusers/loaders/transformer_sd3.py +89 -0
  19. diffusers/loaders/unet.py +464 -540
  20. diffusers/loaders/unet_loader_utils.py +163 -0
  21. diffusers/models/__init__.py +76 -7
  22. diffusers/models/activations.py +65 -10
  23. diffusers/models/adapter.py +53 -53
  24. diffusers/models/attention.py +605 -18
  25. diffusers/models/attention_flax.py +1 -1
  26. diffusers/models/attention_processor.py +4304 -687
  27. diffusers/models/autoencoders/__init__.py +8 -0
  28. diffusers/models/autoencoders/autoencoder_asym_kl.py +15 -17
  29. diffusers/models/autoencoders/autoencoder_dc.py +620 -0
  30. diffusers/models/autoencoders/autoencoder_kl.py +110 -28
  31. diffusers/models/autoencoders/autoencoder_kl_allegro.py +1149 -0
  32. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +1482 -0
  33. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +1176 -0
  34. diffusers/models/autoencoders/autoencoder_kl_ltx.py +1338 -0
  35. diffusers/models/autoencoders/autoencoder_kl_mochi.py +1166 -0
  36. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +19 -24
  37. diffusers/models/autoencoders/autoencoder_oobleck.py +464 -0
  38. diffusers/models/autoencoders/autoencoder_tiny.py +21 -18
  39. diffusers/models/autoencoders/consistency_decoder_vae.py +45 -20
  40. diffusers/models/autoencoders/vae.py +41 -29
  41. diffusers/models/autoencoders/vq_model.py +182 -0
  42. diffusers/models/controlnet.py +47 -800
  43. diffusers/models/controlnet_flux.py +70 -0
  44. diffusers/models/controlnet_sd3.py +68 -0
  45. diffusers/models/controlnet_sparsectrl.py +116 -0
  46. diffusers/models/controlnets/__init__.py +23 -0
  47. diffusers/models/controlnets/controlnet.py +872 -0
  48. diffusers/models/{controlnet_flax.py → controlnets/controlnet_flax.py} +9 -9
  49. diffusers/models/controlnets/controlnet_flux.py +536 -0
  50. diffusers/models/controlnets/controlnet_hunyuan.py +401 -0
  51. diffusers/models/controlnets/controlnet_sd3.py +489 -0
  52. diffusers/models/controlnets/controlnet_sparsectrl.py +788 -0
  53. diffusers/models/controlnets/controlnet_union.py +832 -0
  54. diffusers/models/controlnets/controlnet_xs.py +1946 -0
  55. diffusers/models/controlnets/multicontrolnet.py +183 -0
  56. diffusers/models/downsampling.py +85 -18
  57. diffusers/models/embeddings.py +1856 -158
  58. diffusers/models/embeddings_flax.py +23 -9
  59. diffusers/models/model_loading_utils.py +480 -0
  60. diffusers/models/modeling_flax_pytorch_utils.py +2 -1
  61. diffusers/models/modeling_flax_utils.py +2 -7
  62. diffusers/models/modeling_outputs.py +14 -0
  63. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  64. diffusers/models/modeling_utils.py +611 -146
  65. diffusers/models/normalization.py +361 -20
  66. diffusers/models/resnet.py +18 -23
  67. diffusers/models/transformers/__init__.py +16 -0
  68. diffusers/models/transformers/auraflow_transformer_2d.py +544 -0
  69. diffusers/models/transformers/cogvideox_transformer_3d.py +542 -0
  70. diffusers/models/transformers/dit_transformer_2d.py +240 -0
  71. diffusers/models/transformers/dual_transformer_2d.py +9 -8
  72. diffusers/models/transformers/hunyuan_transformer_2d.py +578 -0
  73. diffusers/models/transformers/latte_transformer_3d.py +327 -0
  74. diffusers/models/transformers/lumina_nextdit2d.py +340 -0
  75. diffusers/models/transformers/pixart_transformer_2d.py +445 -0
  76. diffusers/models/transformers/prior_transformer.py +13 -13
  77. diffusers/models/transformers/sana_transformer.py +488 -0
  78. diffusers/models/transformers/stable_audio_transformer.py +458 -0
  79. diffusers/models/transformers/t5_film_transformer.py +17 -19
  80. diffusers/models/transformers/transformer_2d.py +297 -187
  81. diffusers/models/transformers/transformer_allegro.py +422 -0
  82. diffusers/models/transformers/transformer_cogview3plus.py +386 -0
  83. diffusers/models/transformers/transformer_flux.py +593 -0
  84. diffusers/models/transformers/transformer_hunyuan_video.py +791 -0
  85. diffusers/models/transformers/transformer_ltx.py +469 -0
  86. diffusers/models/transformers/transformer_mochi.py +499 -0
  87. diffusers/models/transformers/transformer_sd3.py +461 -0
  88. diffusers/models/transformers/transformer_temporal.py +21 -19
  89. diffusers/models/unets/unet_1d.py +8 -8
  90. diffusers/models/unets/unet_1d_blocks.py +31 -31
  91. diffusers/models/unets/unet_2d.py +17 -10
  92. diffusers/models/unets/unet_2d_blocks.py +225 -149
  93. diffusers/models/unets/unet_2d_condition.py +50 -53
  94. diffusers/models/unets/unet_2d_condition_flax.py +6 -5
  95. diffusers/models/unets/unet_3d_blocks.py +192 -1057
  96. diffusers/models/unets/unet_3d_condition.py +22 -27
  97. diffusers/models/unets/unet_i2vgen_xl.py +22 -18
  98. diffusers/models/unets/unet_kandinsky3.py +2 -2
  99. diffusers/models/unets/unet_motion_model.py +1413 -89
  100. diffusers/models/unets/unet_spatio_temporal_condition.py +40 -16
  101. diffusers/models/unets/unet_stable_cascade.py +19 -18
  102. diffusers/models/unets/uvit_2d.py +2 -2
  103. diffusers/models/upsampling.py +95 -26
  104. diffusers/models/vq_model.py +12 -164
  105. diffusers/optimization.py +1 -1
  106. diffusers/pipelines/__init__.py +202 -3
  107. diffusers/pipelines/allegro/__init__.py +48 -0
  108. diffusers/pipelines/allegro/pipeline_allegro.py +938 -0
  109. diffusers/pipelines/allegro/pipeline_output.py +23 -0
  110. diffusers/pipelines/amused/pipeline_amused.py +12 -12
  111. diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
  112. diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
  113. diffusers/pipelines/animatediff/__init__.py +8 -0
  114. diffusers/pipelines/animatediff/pipeline_animatediff.py +122 -109
  115. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +1106 -0
  116. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1288 -0
  117. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +1010 -0
  118. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +236 -180
  119. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
  120. diffusers/pipelines/animatediff/pipeline_output.py +3 -2
  121. diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
  122. diffusers/pipelines/audioldm2/modeling_audioldm2.py +58 -39
  123. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +121 -36
  124. diffusers/pipelines/aura_flow/__init__.py +48 -0
  125. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +584 -0
  126. diffusers/pipelines/auto_pipeline.py +196 -28
  127. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  128. diffusers/pipelines/blip_diffusion/modeling_blip2.py +6 -6
  129. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
  130. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
  131. diffusers/pipelines/cogvideo/__init__.py +54 -0
  132. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +772 -0
  133. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +825 -0
  134. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +885 -0
  135. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +851 -0
  136. diffusers/pipelines/cogvideo/pipeline_output.py +20 -0
  137. diffusers/pipelines/cogview3/__init__.py +47 -0
  138. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
  139. diffusers/pipelines/cogview3/pipeline_output.py +21 -0
  140. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +6 -6
  141. diffusers/pipelines/controlnet/__init__.py +86 -80
  142. diffusers/pipelines/controlnet/multicontrolnet.py +7 -182
  143. diffusers/pipelines/controlnet/pipeline_controlnet.py +134 -87
  144. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
  145. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +93 -77
  146. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +88 -197
  147. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +136 -90
  148. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +176 -80
  149. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +125 -89
  150. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +1790 -0
  151. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +1501 -0
  152. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +1627 -0
  153. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -2
  154. diffusers/pipelines/controlnet_hunyuandit/__init__.py +48 -0
  155. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +1060 -0
  156. diffusers/pipelines/controlnet_sd3/__init__.py +57 -0
  157. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +1133 -0
  158. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
  159. diffusers/pipelines/controlnet_xs/__init__.py +68 -0
  160. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +916 -0
  161. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1111 -0
  162. diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
  163. diffusers/pipelines/deepfloyd_if/pipeline_if.py +16 -30
  164. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +20 -35
  165. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +23 -41
  166. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +22 -38
  167. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +25 -41
  168. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +19 -34
  169. diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
  170. diffusers/pipelines/deepfloyd_if/watermark.py +1 -1
  171. diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
  172. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +70 -30
  173. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +48 -25
  174. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
  175. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
  176. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +21 -20
  177. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +27 -29
  178. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +33 -27
  179. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +33 -23
  180. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +36 -30
  181. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +102 -69
  182. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
  183. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
  184. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
  185. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
  186. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
  187. diffusers/pipelines/dit/pipeline_dit.py +7 -4
  188. diffusers/pipelines/flux/__init__.py +69 -0
  189. diffusers/pipelines/flux/modeling_flux.py +47 -0
  190. diffusers/pipelines/flux/pipeline_flux.py +957 -0
  191. diffusers/pipelines/flux/pipeline_flux_control.py +889 -0
  192. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +945 -0
  193. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1141 -0
  194. diffusers/pipelines/flux/pipeline_flux_controlnet.py +1006 -0
  195. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +998 -0
  196. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1204 -0
  197. diffusers/pipelines/flux/pipeline_flux_fill.py +969 -0
  198. diffusers/pipelines/flux/pipeline_flux_img2img.py +856 -0
  199. diffusers/pipelines/flux/pipeline_flux_inpaint.py +1022 -0
  200. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +492 -0
  201. diffusers/pipelines/flux/pipeline_output.py +37 -0
  202. diffusers/pipelines/free_init_utils.py +41 -38
  203. diffusers/pipelines/free_noise_utils.py +596 -0
  204. diffusers/pipelines/hunyuan_video/__init__.py +48 -0
  205. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +687 -0
  206. diffusers/pipelines/hunyuan_video/pipeline_output.py +20 -0
  207. diffusers/pipelines/hunyuandit/__init__.py +48 -0
  208. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +916 -0
  209. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
  210. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
  211. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +32 -29
  212. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
  213. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
  214. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
  215. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  216. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +34 -31
  217. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
  218. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
  219. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
  220. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
  221. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
  222. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
  223. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
  224. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +22 -35
  225. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +26 -37
  226. diffusers/pipelines/kolors/__init__.py +54 -0
  227. diffusers/pipelines/kolors/pipeline_kolors.py +1070 -0
  228. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +1250 -0
  229. diffusers/pipelines/kolors/pipeline_output.py +21 -0
  230. diffusers/pipelines/kolors/text_encoder.py +889 -0
  231. diffusers/pipelines/kolors/tokenizer.py +338 -0
  232. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +82 -62
  233. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +77 -60
  234. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +12 -12
  235. diffusers/pipelines/latte/__init__.py +48 -0
  236. diffusers/pipelines/latte/pipeline_latte.py +881 -0
  237. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +80 -74
  238. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +85 -76
  239. diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
  240. diffusers/pipelines/ltx/__init__.py +50 -0
  241. diffusers/pipelines/ltx/pipeline_ltx.py +789 -0
  242. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +885 -0
  243. diffusers/pipelines/ltx/pipeline_output.py +20 -0
  244. diffusers/pipelines/lumina/__init__.py +48 -0
  245. diffusers/pipelines/lumina/pipeline_lumina.py +890 -0
  246. diffusers/pipelines/marigold/__init__.py +50 -0
  247. diffusers/pipelines/marigold/marigold_image_processing.py +576 -0
  248. diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
  249. diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
  250. diffusers/pipelines/mochi/__init__.py +48 -0
  251. diffusers/pipelines/mochi/pipeline_mochi.py +748 -0
  252. diffusers/pipelines/mochi/pipeline_output.py +20 -0
  253. diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
  254. diffusers/pipelines/pag/__init__.py +80 -0
  255. diffusers/pipelines/pag/pag_utils.py +243 -0
  256. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1328 -0
  257. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1543 -0
  258. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +1610 -0
  259. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1683 -0
  260. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +969 -0
  261. diffusers/pipelines/pag/pipeline_pag_kolors.py +1136 -0
  262. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +865 -0
  263. diffusers/pipelines/pag/pipeline_pag_sana.py +886 -0
  264. diffusers/pipelines/pag/pipeline_pag_sd.py +1062 -0
  265. diffusers/pipelines/pag/pipeline_pag_sd_3.py +994 -0
  266. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +1058 -0
  267. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +866 -0
  268. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1094 -0
  269. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +1356 -0
  270. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +1345 -0
  271. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +1544 -0
  272. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +1776 -0
  273. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
  274. diffusers/pipelines/pia/pipeline_pia.py +74 -164
  275. diffusers/pipelines/pipeline_flax_utils.py +5 -10
  276. diffusers/pipelines/pipeline_loading_utils.py +515 -53
  277. diffusers/pipelines/pipeline_utils.py +411 -222
  278. diffusers/pipelines/pixart_alpha/__init__.py +8 -1
  279. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +76 -93
  280. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +873 -0
  281. diffusers/pipelines/sana/__init__.py +47 -0
  282. diffusers/pipelines/sana/pipeline_output.py +21 -0
  283. diffusers/pipelines/sana/pipeline_sana.py +884 -0
  284. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +27 -23
  285. diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
  286. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
  287. diffusers/pipelines/shap_e/renderer.py +1 -1
  288. diffusers/pipelines/stable_audio/__init__.py +50 -0
  289. diffusers/pipelines/stable_audio/modeling_stable_audio.py +158 -0
  290. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +756 -0
  291. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +71 -25
  292. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
  293. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +35 -34
  294. diffusers/pipelines/stable_diffusion/__init__.py +0 -1
  295. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +20 -11
  296. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  297. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  298. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
  299. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +145 -79
  300. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +43 -28
  301. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
  302. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +100 -68
  303. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +109 -201
  304. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +131 -32
  305. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +247 -87
  306. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +30 -29
  307. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +35 -27
  308. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +49 -42
  309. diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
  310. diffusers/pipelines/stable_diffusion_3/__init__.py +54 -0
  311. diffusers/pipelines/stable_diffusion_3/pipeline_output.py +21 -0
  312. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +1140 -0
  313. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +1036 -0
  314. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +1250 -0
  315. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +29 -20
  316. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +59 -58
  317. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +31 -25
  318. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +38 -22
  319. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -24
  320. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -23
  321. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +107 -67
  322. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +316 -69
  323. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
  324. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  325. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +98 -30
  326. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +121 -83
  327. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +161 -105
  328. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +142 -218
  329. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -29
  330. diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
  331. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
  332. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +69 -39
  333. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +105 -74
  334. diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
  335. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +29 -49
  336. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +32 -93
  337. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +37 -25
  338. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +54 -40
  339. diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
  340. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
  341. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
  342. diffusers/pipelines/unidiffuser/modeling_uvit.py +12 -12
  343. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +29 -28
  344. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
  345. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
  346. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +6 -8
  347. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
  348. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
  349. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +15 -14
  350. diffusers/{models/dual_transformer_2d.py → quantizers/__init__.py} +2 -6
  351. diffusers/quantizers/auto.py +139 -0
  352. diffusers/quantizers/base.py +233 -0
  353. diffusers/quantizers/bitsandbytes/__init__.py +2 -0
  354. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +561 -0
  355. diffusers/quantizers/bitsandbytes/utils.py +306 -0
  356. diffusers/quantizers/gguf/__init__.py +1 -0
  357. diffusers/quantizers/gguf/gguf_quantizer.py +159 -0
  358. diffusers/quantizers/gguf/utils.py +456 -0
  359. diffusers/quantizers/quantization_config.py +669 -0
  360. diffusers/quantizers/torchao/__init__.py +15 -0
  361. diffusers/quantizers/torchao/torchao_quantizer.py +292 -0
  362. diffusers/schedulers/__init__.py +12 -2
  363. diffusers/schedulers/deprecated/__init__.py +1 -1
  364. diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
  365. diffusers/schedulers/scheduling_amused.py +5 -5
  366. diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
  367. diffusers/schedulers/scheduling_consistency_models.py +23 -25
  368. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +572 -0
  369. diffusers/schedulers/scheduling_ddim.py +27 -26
  370. diffusers/schedulers/scheduling_ddim_cogvideox.py +452 -0
  371. diffusers/schedulers/scheduling_ddim_flax.py +2 -1
  372. diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
  373. diffusers/schedulers/scheduling_ddim_parallel.py +32 -31
  374. diffusers/schedulers/scheduling_ddpm.py +27 -30
  375. diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
  376. diffusers/schedulers/scheduling_ddpm_parallel.py +33 -36
  377. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
  378. diffusers/schedulers/scheduling_deis_multistep.py +150 -50
  379. diffusers/schedulers/scheduling_dpm_cogvideox.py +489 -0
  380. diffusers/schedulers/scheduling_dpmsolver_multistep.py +221 -84
  381. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
  382. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +158 -52
  383. diffusers/schedulers/scheduling_dpmsolver_sde.py +153 -34
  384. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +275 -86
  385. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +81 -57
  386. diffusers/schedulers/scheduling_edm_euler.py +62 -39
  387. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +30 -29
  388. diffusers/schedulers/scheduling_euler_discrete.py +255 -74
  389. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +458 -0
  390. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +320 -0
  391. diffusers/schedulers/scheduling_heun_discrete.py +174 -46
  392. diffusers/schedulers/scheduling_ipndm.py +9 -9
  393. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +138 -29
  394. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +132 -26
  395. diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
  396. diffusers/schedulers/scheduling_lcm.py +23 -29
  397. diffusers/schedulers/scheduling_lms_discrete.py +105 -28
  398. diffusers/schedulers/scheduling_pndm.py +20 -20
  399. diffusers/schedulers/scheduling_repaint.py +21 -21
  400. diffusers/schedulers/scheduling_sasolver.py +157 -60
  401. diffusers/schedulers/scheduling_sde_ve.py +19 -19
  402. diffusers/schedulers/scheduling_tcd.py +41 -36
  403. diffusers/schedulers/scheduling_unclip.py +19 -16
  404. diffusers/schedulers/scheduling_unipc_multistep.py +243 -47
  405. diffusers/schedulers/scheduling_utils.py +12 -5
  406. diffusers/schedulers/scheduling_utils_flax.py +1 -3
  407. diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
  408. diffusers/training_utils.py +214 -30
  409. diffusers/utils/__init__.py +17 -1
  410. diffusers/utils/constants.py +3 -0
  411. diffusers/utils/doc_utils.py +1 -0
  412. diffusers/utils/dummy_pt_objects.py +592 -7
  413. diffusers/utils/dummy_torch_and_torchsde_objects.py +15 -0
  414. diffusers/utils/dummy_torch_and_transformers_and_sentencepiece_objects.py +47 -0
  415. diffusers/utils/dummy_torch_and_transformers_objects.py +1001 -71
  416. diffusers/utils/dynamic_modules_utils.py +34 -29
  417. diffusers/utils/export_utils.py +50 -6
  418. diffusers/utils/hub_utils.py +131 -17
  419. diffusers/utils/import_utils.py +210 -8
  420. diffusers/utils/loading_utils.py +118 -5
  421. diffusers/utils/logging.py +4 -2
  422. diffusers/utils/peft_utils.py +37 -7
  423. diffusers/utils/state_dict_utils.py +13 -2
  424. diffusers/utils/testing_utils.py +193 -11
  425. diffusers/utils/torch_utils.py +4 -0
  426. diffusers/video_processor.py +113 -0
  427. {diffusers-0.27.0.dist-info → diffusers-0.32.2.dist-info}/METADATA +82 -91
  428. diffusers-0.32.2.dist-info/RECORD +550 -0
  429. {diffusers-0.27.0.dist-info → diffusers-0.32.2.dist-info}/WHEEL +1 -1
  430. diffusers/loaders/autoencoder.py +0 -146
  431. diffusers/loaders/controlnet.py +0 -136
  432. diffusers/loaders/lora.py +0 -1349
  433. diffusers/models/prior_transformer.py +0 -12
  434. diffusers/models/t5_film_transformer.py +0 -70
  435. diffusers/models/transformer_2d.py +0 -25
  436. diffusers/models/transformer_temporal.py +0 -34
  437. diffusers/models/unet_1d.py +0 -26
  438. diffusers/models/unet_1d_blocks.py +0 -203
  439. diffusers/models/unet_2d.py +0 -27
  440. diffusers/models/unet_2d_blocks.py +0 -375
  441. diffusers/models/unet_2d_condition.py +0 -25
  442. diffusers-0.27.0.dist-info/RECORD +0 -399
  443. {diffusers-0.27.0.dist-info → diffusers-0.32.2.dist-info}/LICENSE +0 -0
  444. {diffusers-0.27.0.dist-info → diffusers-0.32.2.dist-info}/entry_points.txt +0 -0
  445. {diffusers-0.27.0.dist-info → diffusers-0.32.2.dist-info}/top_level.txt +0 -0
@@ -266,7 +266,7 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
266
266
  and not isinstance(image, list)
267
267
  ):
268
268
  raise ValueError(
269
- "`image` has to be of type `torch.FloatTensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
269
+ "`image` has to be of type `torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
270
270
  f" {type(image)}"
271
271
  )
272
272
 
@@ -283,7 +283,12 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
283
283
 
284
284
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
285
285
  def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
286
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
286
+ shape = (
287
+ batch_size,
288
+ num_channels_latents,
289
+ int(height) // self.vae_scale_factor,
290
+ int(width) // self.vae_scale_factor,
291
+ )
287
292
  if isinstance(generator, list) and len(generator) != batch_size:
288
293
  raise ValueError(
289
294
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -388,9 +393,9 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
388
393
  @torch.no_grad()
389
394
  def __call__(
390
395
  self,
391
- example_image: Union[torch.FloatTensor, PIL.Image.Image],
392
- image: Union[torch.FloatTensor, PIL.Image.Image],
393
- mask_image: Union[torch.FloatTensor, PIL.Image.Image],
396
+ example_image: Union[torch.Tensor, PIL.Image.Image],
397
+ image: Union[torch.Tensor, PIL.Image.Image],
398
+ mask_image: Union[torch.Tensor, PIL.Image.Image],
394
399
  height: Optional[int] = None,
395
400
  width: Optional[int] = None,
396
401
  num_inference_steps: int = 50,
@@ -399,22 +404,22 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
399
404
  num_images_per_prompt: Optional[int] = 1,
400
405
  eta: float = 0.0,
401
406
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
402
- latents: Optional[torch.FloatTensor] = None,
407
+ latents: Optional[torch.Tensor] = None,
403
408
  output_type: Optional[str] = "pil",
404
409
  return_dict: bool = True,
405
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
410
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
406
411
  callback_steps: int = 1,
407
412
  ):
408
413
  r"""
409
414
  The call function to the pipeline for generation.
410
415
 
411
416
  Args:
412
- example_image (`torch.FloatTensor` or `PIL.Image.Image` or `List[PIL.Image.Image]`):
417
+ example_image (`torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]`):
413
418
  An example image to guide image generation.
414
- image (`torch.FloatTensor` or `PIL.Image.Image` or `List[PIL.Image.Image]`):
419
+ image (`torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]`):
415
420
  `Image` or tensor representing an image batch to be inpainted (parts of the image are masked out with
416
421
  `mask_image` and repainted according to `prompt`).
417
- mask_image (`torch.FloatTensor` or `PIL.Image.Image` or `List[PIL.Image.Image]`):
422
+ mask_image (`torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]`):
418
423
  `Image` or tensor representing an image batch to mask `image`. White pixels in the mask are repainted,
419
424
  while black pixels are preserved. If `mask_image` is a PIL image, it is converted to a single channel
420
425
  (luminance) before use. If it's a tensor, it should contain one color channel (L) instead of 3, so the
@@ -440,7 +445,7 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
440
445
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
441
446
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
442
447
  generation deterministic.
443
- latents (`torch.FloatTensor`, *optional*):
448
+ latents (`torch.Tensor`, *optional*):
444
449
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
445
450
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
446
451
  tensor is generated by sampling using the supplied random `generator`.
@@ -451,7 +456,7 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
451
456
  plain tuple.
452
457
  callback (`Callable`, *optional*):
453
458
  A function that calls every `callback_steps` steps during inference. The function is called with the
454
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
459
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
455
460
  callback_steps (`int`, *optional*, defaults to 1):
456
461
  The frequency at which the `callback` function is called. If not specified, the callback is called at
457
462
  every step.
@@ -13,18 +13,16 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import inspect
16
- import math
17
16
  from dataclasses import dataclass
18
- from typing import Any, Callable, Dict, List, Optional, Tuple, Union
17
+ from typing import Any, Callable, Dict, List, Optional, Union
19
18
 
20
19
  import numpy as np
21
20
  import PIL
22
21
  import torch
23
- import torch.fft as fft
24
22
  from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
25
23
 
26
- from ...image_processor import PipelineImageInput, VaeImageProcessor
27
- from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
24
+ from ...image_processor import PipelineImageInput
25
+ from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
28
26
  from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
29
27
  from ...models.lora import adjust_lora_scale_text_encoder
30
28
  from ...models.unets.unet_motion_model import MotionAdapter
@@ -45,6 +43,7 @@ from ...utils import (
45
43
  unscale_lora_layers,
46
44
  )
47
45
  from ...utils.torch_utils import randn_tensor
46
+ from ...video_processor import VideoProcessor
48
47
  from ..free_init_utils import FreeInitMixin
49
48
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
50
49
 
@@ -55,21 +54,21 @@ EXAMPLE_DOC_STRING = """
55
54
  Examples:
56
55
  ```py
57
56
  >>> import torch
58
- >>> from diffusers import (
59
- ... EulerDiscreteScheduler,
60
- ... MotionAdapter,
61
- ... PIAPipeline,
62
- ... )
57
+ >>> from diffusers import EulerDiscreteScheduler, MotionAdapter, PIAPipeline
63
58
  >>> from diffusers.utils import export_to_gif, load_image
64
- >>> adapter = MotionAdapter.from_pretrained("../checkpoints/pia-diffusers")
65
- >>> pipe = PIAPipeline.from_pretrained("SG161222/Realistic_Vision_V6.0_B1_noVAE", motion_adapter=adapter)
59
+
60
+ >>> adapter = MotionAdapter.from_pretrained("openmmlab/PIA-condition-adapter")
61
+ >>> pipe = PIAPipeline.from_pretrained(
62
+ ... "SG161222/Realistic_Vision_V6.0_B1_noVAE", motion_adapter=adapter, torch_dtype=torch.float16
63
+ ... )
64
+
66
65
  >>> pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
67
66
  >>> image = load_image(
68
67
  ... "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/pix2pix/cat_6.png?download=true"
69
68
  ... )
70
69
  >>> image = image.resize((512, 512))
71
70
  >>> prompt = "cat in a hat"
72
- >>> negative_prompt = "wrong white balance, dark, sketches,worst quality,low quality, deformed, distorted, disfigured, bad eyes, wrong lips,weird mouth, bad teeth, mutated hands and fingers, bad anatomy,wrong anatomy, amputation, extra limb, missing limb, floating,limbs, disconnected limbs, mutation, ugly, disgusting, bad_pictures, negative_hand-neg"
71
+ >>> negative_prompt = "wrong white balance, dark, sketches, worst quality, low quality, deformed, distorted"
73
72
  >>> generator = torch.Generator("cpu").manual_seed(0)
74
73
  >>> output = pipe(image=image, prompt=prompt, negative_prompt=negative_prompt, generator=generator)
75
74
  >>> frames = output.frames[0]
@@ -90,28 +89,6 @@ RANGE_LIST = [
90
89
  ]
91
90
 
92
91
 
93
- # Copied from diffusers.pipelines.animatediff.pipeline_animatediff.tensor2vid
94
- def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: str = "np"):
95
- batch_size, channels, num_frames, height, width = video.shape
96
- outputs = []
97
- for batch_idx in range(batch_size):
98
- batch_vid = video[batch_idx].permute(1, 0, 2, 3)
99
- batch_output = processor.postprocess(batch_vid, output_type)
100
-
101
- outputs.append(batch_output)
102
-
103
- if output_type == "np":
104
- outputs = np.stack(outputs)
105
-
106
- elif output_type == "pt":
107
- outputs = torch.stack(outputs)
108
-
109
- elif not output_type == "pil":
110
- raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil']")
111
-
112
- return outputs
113
-
114
-
115
92
  def prepare_mask_coef_by_statistics(num_frames: int, cond_frame: int, motion_scale: int):
116
93
  assert num_frames > 0, "video_length should be greater than 0"
117
94
 
@@ -130,71 +107,6 @@ def prepare_mask_coef_by_statistics(num_frames: int, cond_frame: int, motion_sca
130
107
  return coef
131
108
 
132
109
 
133
- def _get_freeinit_freq_filter(
134
- shape: Tuple[int, ...],
135
- device: Union[str, torch.dtype],
136
- filter_type: str,
137
- order: float,
138
- spatial_stop_frequency: float,
139
- temporal_stop_frequency: float,
140
- ) -> torch.Tensor:
141
- r"""Returns the FreeInit filter based on filter type and other input conditions."""
142
-
143
- time, height, width = shape[-3], shape[-2], shape[-1]
144
- mask = torch.zeros(shape)
145
-
146
- if spatial_stop_frequency == 0 or temporal_stop_frequency == 0:
147
- return mask
148
-
149
- if filter_type == "butterworth":
150
-
151
- def retrieve_mask(x):
152
- return 1 / (1 + (x / spatial_stop_frequency**2) ** order)
153
- elif filter_type == "gaussian":
154
-
155
- def retrieve_mask(x):
156
- return math.exp(-1 / (2 * spatial_stop_frequency**2) * x)
157
- elif filter_type == "ideal":
158
-
159
- def retrieve_mask(x):
160
- return 1 if x <= spatial_stop_frequency * 2 else 0
161
- else:
162
- raise NotImplementedError("`filter_type` must be one of gaussian, butterworth or ideal")
163
-
164
- for t in range(time):
165
- for h in range(height):
166
- for w in range(width):
167
- d_square = (
168
- ((spatial_stop_frequency / temporal_stop_frequency) * (2 * t / time - 1)) ** 2
169
- + (2 * h / height - 1) ** 2
170
- + (2 * w / width - 1) ** 2
171
- )
172
- mask[..., t, h, w] = retrieve_mask(d_square)
173
-
174
- return mask.to(device)
175
-
176
-
177
- def _freq_mix_3d(x: torch.Tensor, noise: torch.Tensor, LPF: torch.Tensor) -> torch.Tensor:
178
- r"""Noise reinitialization."""
179
- # FFT
180
- x_freq = fft.fftn(x, dim=(-3, -2, -1))
181
- x_freq = fft.fftshift(x_freq, dim=(-3, -2, -1))
182
- noise_freq = fft.fftn(noise, dim=(-3, -2, -1))
183
- noise_freq = fft.fftshift(noise_freq, dim=(-3, -2, -1))
184
-
185
- # frequency mix
186
- HPF = 1 - LPF
187
- x_freq_low = x_freq * LPF
188
- noise_freq_high = noise_freq * HPF
189
- x_freq_mixed = x_freq_low + noise_freq_high # mix in freq domain
190
-
191
- # IFFT
192
- x_freq_mixed = fft.ifftshift(x_freq_mixed, dim=(-3, -2, -1))
193
- x_mixed = fft.ifftn(x_freq_mixed, dim=(-3, -2, -1)).real
194
-
195
- return x_mixed
196
-
197
-
198
110
  @dataclass
199
111
  class PIAPipelineOutput(BaseOutput):
200
112
  r"""
@@ -202,9 +114,9 @@ class PIAPipelineOutput(BaseOutput):
202
114
 
203
115
  Args:
204
116
  frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
205
- Nested list of length `batch_size` with denoised PIL image sequences of length `num_frames`,
206
- NumPy array of shape `(batch_size, num_frames, channels, height, width,
207
- Torch tensor of shape `(batch_size, num_frames, channels, height, width)`.
117
+ Nested list of length `batch_size` with denoised PIL image sequences of length `num_frames`, NumPy array of
118
+ shape `(batch_size, num_frames, channels, height, width, Torch tensor of shape `(batch_size, num_frames,
119
+ channels, height, width)`.
208
120
  """
209
121
 
210
122
  frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]
@@ -215,7 +127,7 @@ class PIAPipeline(
215
127
  StableDiffusionMixin,
216
128
  TextualInversionLoaderMixin,
217
129
  IPAdapterMixin,
218
- LoraLoaderMixin,
130
+ StableDiffusionLoraLoaderMixin,
219
131
  FromSingleFileMixin,
220
132
  FreeInitMixin,
221
133
  ):
@@ -227,8 +139,8 @@ class PIAPipeline(
227
139
 
228
140
  The pipeline also inherits the following loading methods:
229
141
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
230
- - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
231
- - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
142
+ - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
143
+ - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
232
144
  - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
233
145
 
234
146
  Args:
@@ -284,7 +196,7 @@ class PIAPipeline(
284
196
  image_encoder=image_encoder,
285
197
  )
286
198
  self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
287
- self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
199
+ self.video_processor = VideoProcessor(do_resize=False, vae_scale_factor=self.vae_scale_factor)
288
200
 
289
201
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt with num_images_per_prompt -> num_videos_per_prompt
290
202
  def encode_prompt(
@@ -294,8 +206,8 @@ class PIAPipeline(
294
206
  num_images_per_prompt,
295
207
  do_classifier_free_guidance,
296
208
  negative_prompt=None,
297
- prompt_embeds: Optional[torch.FloatTensor] = None,
298
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
209
+ prompt_embeds: Optional[torch.Tensor] = None,
210
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
299
211
  lora_scale: Optional[float] = None,
300
212
  clip_skip: Optional[int] = None,
301
213
  ):
@@ -315,10 +227,10 @@ class PIAPipeline(
315
227
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
316
228
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
317
229
  less than `1`).
318
- prompt_embeds (`torch.FloatTensor`, *optional*):
230
+ prompt_embeds (`torch.Tensor`, *optional*):
319
231
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
320
232
  provided, text embeddings will be generated from `prompt` input argument.
321
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
233
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
322
234
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
323
235
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
324
236
  argument.
@@ -330,7 +242,7 @@ class PIAPipeline(
330
242
  """
331
243
  # set lora scale so that monkey patched LoRA
332
244
  # function of text encoder can correctly access it
333
- if lora_scale is not None and isinstance(self, LoraLoaderMixin):
245
+ if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
334
246
  self._lora_scale = lora_scale
335
247
 
336
248
  # dynamically adjust the LoRA scale
@@ -462,9 +374,10 @@ class PIAPipeline(
462
374
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
463
375
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
464
376
 
465
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
466
- # Retrieve the original scale by scaling back the LoRA layers
467
- unscale_lora_layers(self.text_encoder, lora_scale)
377
+ if self.text_encoder is not None:
378
+ if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
379
+ # Retrieve the original scale by scaling back the LoRA layers
380
+ unscale_lora_layers(self.text_encoder, lora_scale)
468
381
 
469
382
  return prompt_embeds, negative_prompt_embeds
470
383
 
@@ -591,6 +504,9 @@ class PIAPipeline(
591
504
  def prepare_ip_adapter_image_embeds(
592
505
  self, ip_adapter_image, ip_adapter_image_embeds, device, num_images_per_prompt, do_classifier_free_guidance
593
506
  ):
507
+ image_embeds = []
508
+ if do_classifier_free_guidance:
509
+ negative_image_embeds = []
594
510
  if ip_adapter_image_embeds is None:
595
511
  if not isinstance(ip_adapter_image, list):
596
512
  ip_adapter_image = [ip_adapter_image]
@@ -600,7 +516,6 @@ class PIAPipeline(
600
516
  f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {len(self.unet.encoder_hid_proj.image_projection_layers)} IP Adapters."
601
517
  )
602
518
 
603
- image_embeds = []
604
519
  for single_ip_adapter_image, image_proj_layer in zip(
605
520
  ip_adapter_image, self.unet.encoder_hid_proj.image_projection_layers
606
521
  ):
@@ -608,36 +523,28 @@ class PIAPipeline(
608
523
  single_image_embeds, single_negative_image_embeds = self.encode_image(
609
524
  single_ip_adapter_image, device, 1, output_hidden_state
610
525
  )
611
- single_image_embeds = torch.stack([single_image_embeds] * num_images_per_prompt, dim=0)
612
- single_negative_image_embeds = torch.stack(
613
- [single_negative_image_embeds] * num_images_per_prompt, dim=0
614
- )
615
526
 
527
+ image_embeds.append(single_image_embeds[None, :])
616
528
  if do_classifier_free_guidance:
617
- single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
618
- single_image_embeds = single_image_embeds.to(device)
619
-
620
- image_embeds.append(single_image_embeds)
529
+ negative_image_embeds.append(single_negative_image_embeds[None, :])
621
530
  else:
622
- repeat_dims = [1]
623
- image_embeds = []
624
531
  for single_image_embeds in ip_adapter_image_embeds:
625
532
  if do_classifier_free_guidance:
626
533
  single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
627
- single_image_embeds = single_image_embeds.repeat(
628
- num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
629
- )
630
- single_negative_image_embeds = single_negative_image_embeds.repeat(
631
- num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
632
- )
633
- single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
634
- else:
635
- single_image_embeds = single_image_embeds.repeat(
636
- num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
637
- )
534
+ negative_image_embeds.append(single_negative_image_embeds)
638
535
  image_embeds.append(single_image_embeds)
639
536
 
640
- return image_embeds
537
+ ip_adapter_image_embeds = []
538
+ for i, single_image_embeds in enumerate(image_embeds):
539
+ single_image_embeds = torch.cat([single_image_embeds] * num_images_per_prompt, dim=0)
540
+ if do_classifier_free_guidance:
541
+ single_negative_image_embeds = torch.cat([negative_image_embeds[i]] * num_images_per_prompt, dim=0)
542
+ single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds], dim=0)
543
+
544
+ single_image_embeds = single_image_embeds.to(device=device)
545
+ ip_adapter_image_embeds.append(single_image_embeds)
546
+
547
+ return ip_adapter_image_embeds
641
548
 
642
549
  # Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
643
550
  def prepare_latents(
@@ -687,7 +594,7 @@ class PIAPipeline(
687
594
  )
688
595
  _, _, _, scaled_height, scaled_width = shape
689
596
 
690
- image = self.image_processor.preprocess(image)
597
+ image = self.video_processor.preprocess(image)
691
598
  image = image.to(device, dtype)
692
599
 
693
600
  if isinstance(generator, list):
@@ -767,11 +674,11 @@ class PIAPipeline(
767
674
  num_videos_per_prompt: Optional[int] = 1,
768
675
  eta: float = 0.0,
769
676
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
770
- latents: Optional[torch.FloatTensor] = None,
771
- prompt_embeds: Optional[torch.FloatTensor] = None,
772
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
677
+ latents: Optional[torch.Tensor] = None,
678
+ prompt_embeds: Optional[torch.Tensor] = None,
679
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
773
680
  ip_adapter_image: Optional[PipelineImageInput] = None,
774
- ip_adapter_image_embeds: Optional[List[torch.FloatTensor]] = None,
681
+ ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
775
682
  motion_scale: int = 0,
776
683
  output_type: Optional[str] = "pil",
777
684
  return_dict: bool = True,
@@ -788,7 +695,8 @@ class PIAPipeline(
788
695
  The input image to be used for video generation.
789
696
  prompt (`str` or `List[str]`, *optional*):
790
697
  The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
791
- strength (`float`, *optional*, defaults to 1.0): Indicates extent to transform the reference `image`. Must be between 0 and 1.
698
+ strength (`float`, *optional*, defaults to 1.0):
699
+ Indicates extent to transform the reference `image`. Must be between 0 and 1.
792
700
  height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
793
701
  The height in pixels of the generated video.
794
702
  width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
@@ -811,33 +719,31 @@ class PIAPipeline(
811
719
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
812
720
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
813
721
  generation deterministic.
814
- latents (`torch.FloatTensor`, *optional*):
722
+ latents (`torch.Tensor`, *optional*):
815
723
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video
816
724
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
817
725
  tensor is generated by sampling using the supplied random `generator`. Latents should be of shape
818
726
  `(batch_size, num_channel, num_frames, height, width)`.
819
- prompt_embeds (`torch.FloatTensor`, *optional*):
727
+ prompt_embeds (`torch.Tensor`, *optional*):
820
728
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
821
729
  provided, text embeddings are generated from the `prompt` input argument.
822
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
730
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
823
731
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
824
732
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
825
733
  ip_adapter_image: (`PipelineImageInput`, *optional*):
826
734
  Optional image input to work with IP Adapters.
827
- ip_adapter_image_embeds (`List[torch.FloatTensor]`, *optional*):
828
- Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of IP-adapters.
829
- Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should contain the negative image embedding
830
- if `do_classifier_free_guidance` is set to `True`.
831
- If not provided, embeddings are computed from the `ip_adapter_image` input argument.
735
+ ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
736
+ Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
737
+ IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
738
+ contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
739
+ provided, embeddings are computed from the `ip_adapter_image` input argument.
832
740
  motion_scale: (`int`, *optional*, defaults to 0):
833
- Parameter that controls the amount and type of motion that is added to the image. Increasing the value increases the amount of motion, while specific
834
- ranges of values control the type of motion that is added. Must be between 0 and 8.
835
- Set between 0-2 to only increase the amount of motion.
836
- Set between 3-5 to create looping motion.
837
- Set between 6-8 to perform motion with image style transfer.
741
+ Parameter that controls the amount and type of motion that is added to the image. Increasing the value
742
+ increases the amount of motion, while specific ranges of values control the type of motion that is
743
+ added. Must be between 0 and 8. Set between 0-2 to only increase the amount of motion. Set between 3-5
744
+ to create looping motion. Set between 6-8 to perform motion with image style transfer.
838
745
  output_type (`str`, *optional*, defaults to `"pil"`):
839
- The output format of the generated video. Choose between `torch.FloatTensor`, `PIL.Image` or
840
- `np.array`.
746
+ The output format of the generated video. Choose between `torch.Tensor`, `PIL.Image` or `np.array`.
841
747
  return_dict (`bool`, *optional*, defaults to `True`):
842
748
  Whether or not to return a [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] instead
843
749
  of a plain tuple.
@@ -855,14 +761,14 @@ class PIAPipeline(
855
761
  callback_on_step_end_tensor_inputs (`List`, *optional*):
856
762
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
857
763
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
858
- `._callback_tensor_inputs` attribute of your pipeine class.
764
+ `._callback_tensor_inputs` attribute of your pipeline class.
859
765
 
860
766
  Examples:
861
767
 
862
768
  Returns:
863
769
  [`~pipelines.pia.pipeline_pia.PIAPipelineOutput`] or `tuple`:
864
- If `return_dict` is `True`, [`~pipelines.pia.pipeline_pia.PIAPipelineOutput`] is
865
- returned, otherwise a `tuple` is returned where the first element is a list with the generated frames.
770
+ If `return_dict` is `True`, [`~pipelines.pia.pipeline_pia.PIAPipelineOutput`] is returned, otherwise a
771
+ `tuple` is returned where the first element is a list with the generated frames.
866
772
  """
867
773
  # 0. Default height and width to unet
868
774
  height = height or self.unet.config.sample_size * self.vae_scale_factor
@@ -918,6 +824,8 @@ class PIAPipeline(
918
824
  if self.do_classifier_free_guidance:
919
825
  prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
920
826
 
827
+ prompt_embeds = prompt_embeds.repeat_interleave(repeats=num_frames, dim=0)
828
+
921
829
  if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
922
830
  image_embeds = self.prepare_ip_adapter_image_embeds(
923
831
  ip_adapter_image,
@@ -979,8 +887,10 @@ class PIAPipeline(
979
887
  latents, free_init_iter, num_inference_steps, device, latents.dtype, generator
980
888
  )
981
889
 
890
+ self._num_timesteps = len(timesteps)
982
891
  num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
983
- with self.progress_bar(total=num_inference_steps) as progress_bar:
892
+
893
+ with self.progress_bar(total=self._num_timesteps) as progress_bar:
984
894
  for i, t in enumerate(timesteps):
985
895
  # expand the latents if we are doing classifier free guidance
986
896
  latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
@@ -1023,7 +933,7 @@ class PIAPipeline(
1023
933
  video = latents
1024
934
  else:
1025
935
  video_tensor = self.decode_latents(latents)
1026
- video = tensor2vid(video_tensor, self.image_processor, output_type=output_type)
936
+ video = self.video_processor.postprocess_video(video=video_tensor, output_type=output_type)
1027
937
 
1028
938
  # 10. Offload all models
1029
939
  self.maybe_free_model_hooks()
@@ -180,7 +180,7 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
180
180
 
181
181
  if push_to_hub:
182
182
  commit_message = kwargs.pop("commit_message", None)
183
- private = kwargs.pop("private", False)
183
+ private = kwargs.pop("private", None)
184
184
  create_pr = kwargs.pop("create_pr", False)
185
185
  token = kwargs.pop("token", None)
186
186
  repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
@@ -254,9 +254,7 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
254
254
  force_download (`bool`, *optional*, defaults to `False`):
255
255
  Whether or not to force the (re-)download of the model weights and configuration files, overriding the
256
256
  cached versions if they exist.
257
- resume_download (`bool`, *optional*, defaults to `False`):
258
- Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
259
- incompletely downloaded files are deleted.
257
+
260
258
  proxies (`Dict[str, str]`, *optional*):
261
259
  A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
262
260
  'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -296,7 +294,7 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
296
294
  >>> # see more in [the documentation](https://huggingface.co/docs/hub/security-tokens)
297
295
  >>> pipeline, params = FlaxDiffusionPipeline.from_pretrained(
298
296
  ... "runwayml/stable-diffusion-v1-5",
299
- ... revision="bf16",
297
+ ... variant="bf16",
300
298
  ... dtype=jnp.bfloat16,
301
299
  ... )
302
300
 
@@ -310,13 +308,12 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
310
308
  ... )
311
309
 
312
310
  >>> dpm_pipe, dpm_params = FlaxStableDiffusionPipeline.from_pretrained(
313
- ... model_id, revision="bf16", dtype=jnp.bfloat16, scheduler=dpmpp
311
+ ... model_id, variant="bf16", dtype=jnp.bfloat16, scheduler=dpmpp
314
312
  ... )
315
313
  >>> dpm_params["scheduler"] = dpmpp_state
316
314
  ```
317
315
  """
318
316
  cache_dir = kwargs.pop("cache_dir", None)
319
- resume_download = kwargs.pop("resume_download", False)
320
317
  proxies = kwargs.pop("proxies", None)
321
318
  local_files_only = kwargs.pop("local_files_only", False)
322
319
  token = kwargs.pop("token", None)
@@ -332,7 +329,6 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
332
329
  config_dict = cls.load_config(
333
330
  pretrained_model_name_or_path,
334
331
  cache_dir=cache_dir,
335
- resume_download=resume_download,
336
332
  proxies=proxies,
337
333
  local_files_only=local_files_only,
338
334
  token=token,
@@ -363,7 +359,6 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
363
359
  cached_folder = snapshot_download(
364
360
  pretrained_model_name_or_path,
365
361
  cache_dir=cache_dir,
366
- resume_download=resume_download,
367
362
  proxies=proxies,
368
363
  local_files_only=local_files_only,
369
364
  token=token,
@@ -564,7 +559,7 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
564
559
  ... )
565
560
 
566
561
  >>> text2img = FlaxStableDiffusionPipeline.from_pretrained(
567
- ... "runwayml/stable-diffusion-v1-5", revision="bf16", dtype=jnp.bfloat16
562
+ ... "runwayml/stable-diffusion-v1-5", variant="bf16", dtype=jnp.bfloat16
568
563
  ... )
569
564
  >>> img2img = FlaxStableDiffusionImg2ImgPipeline(**text2img.components)
570
565
  ```