diffusers 0.26.3__py3-none-any.whl → 0.27.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (299) hide show
  1. diffusers/__init__.py +20 -1
  2. diffusers/commands/__init__.py +1 -1
  3. diffusers/commands/diffusers_cli.py +1 -1
  4. diffusers/commands/env.py +1 -1
  5. diffusers/commands/fp16_safetensors.py +1 -1
  6. diffusers/configuration_utils.py +7 -3
  7. diffusers/dependency_versions_check.py +1 -1
  8. diffusers/dependency_versions_table.py +2 -2
  9. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  10. diffusers/image_processor.py +110 -4
  11. diffusers/loaders/autoencoder.py +7 -8
  12. diffusers/loaders/controlnet.py +17 -8
  13. diffusers/loaders/ip_adapter.py +86 -23
  14. diffusers/loaders/lora.py +105 -310
  15. diffusers/loaders/lora_conversion_utils.py +1 -1
  16. diffusers/loaders/peft.py +1 -1
  17. diffusers/loaders/single_file.py +51 -12
  18. diffusers/loaders/single_file_utils.py +274 -49
  19. diffusers/loaders/textual_inversion.py +23 -4
  20. diffusers/loaders/unet.py +195 -41
  21. diffusers/loaders/utils.py +1 -1
  22. diffusers/models/__init__.py +3 -1
  23. diffusers/models/activations.py +9 -9
  24. diffusers/models/attention.py +26 -36
  25. diffusers/models/attention_flax.py +1 -1
  26. diffusers/models/attention_processor.py +171 -114
  27. diffusers/models/autoencoders/autoencoder_asym_kl.py +1 -1
  28. diffusers/models/autoencoders/autoencoder_kl.py +3 -1
  29. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +1 -1
  30. diffusers/models/autoencoders/autoencoder_tiny.py +4 -2
  31. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  32. diffusers/models/autoencoders/vae.py +1 -1
  33. diffusers/models/controlnet.py +1 -1
  34. diffusers/models/controlnet_flax.py +1 -1
  35. diffusers/models/downsampling.py +8 -12
  36. diffusers/models/dual_transformer_2d.py +1 -1
  37. diffusers/models/embeddings.py +3 -4
  38. diffusers/models/embeddings_flax.py +1 -1
  39. diffusers/models/lora.py +33 -10
  40. diffusers/models/modeling_flax_pytorch_utils.py +1 -1
  41. diffusers/models/modeling_flax_utils.py +1 -1
  42. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  43. diffusers/models/modeling_utils.py +4 -6
  44. diffusers/models/normalization.py +1 -1
  45. diffusers/models/resnet.py +31 -58
  46. diffusers/models/resnet_flax.py +1 -1
  47. diffusers/models/t5_film_transformer.py +1 -1
  48. diffusers/models/transformer_2d.py +1 -1
  49. diffusers/models/transformer_temporal.py +1 -1
  50. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  51. diffusers/models/transformers/t5_film_transformer.py +1 -1
  52. diffusers/models/transformers/transformer_2d.py +29 -31
  53. diffusers/models/transformers/transformer_temporal.py +1 -1
  54. diffusers/models/unet_1d.py +1 -1
  55. diffusers/models/unet_1d_blocks.py +1 -1
  56. diffusers/models/unet_2d.py +1 -1
  57. diffusers/models/unet_2d_blocks.py +1 -1
  58. diffusers/models/unet_2d_condition.py +1 -1
  59. diffusers/models/unets/__init__.py +1 -0
  60. diffusers/models/unets/unet_1d.py +1 -1
  61. diffusers/models/unets/unet_1d_blocks.py +1 -1
  62. diffusers/models/unets/unet_2d.py +4 -4
  63. diffusers/models/unets/unet_2d_blocks.py +238 -98
  64. diffusers/models/unets/unet_2d_blocks_flax.py +1 -1
  65. diffusers/models/unets/unet_2d_condition.py +420 -323
  66. diffusers/models/unets/unet_2d_condition_flax.py +21 -12
  67. diffusers/models/unets/unet_3d_blocks.py +50 -40
  68. diffusers/models/unets/unet_3d_condition.py +47 -8
  69. diffusers/models/unets/unet_i2vgen_xl.py +75 -30
  70. diffusers/models/unets/unet_kandinsky3.py +1 -1
  71. diffusers/models/unets/unet_motion_model.py +48 -8
  72. diffusers/models/unets/unet_spatio_temporal_condition.py +1 -1
  73. diffusers/models/unets/unet_stable_cascade.py +610 -0
  74. diffusers/models/unets/uvit_2d.py +1 -1
  75. diffusers/models/upsampling.py +10 -16
  76. diffusers/models/vae_flax.py +1 -1
  77. diffusers/models/vq_model.py +1 -1
  78. diffusers/optimization.py +1 -1
  79. diffusers/pipelines/__init__.py +26 -0
  80. diffusers/pipelines/amused/pipeline_amused.py +1 -1
  81. diffusers/pipelines/amused/pipeline_amused_img2img.py +1 -1
  82. diffusers/pipelines/amused/pipeline_amused_inpaint.py +1 -1
  83. diffusers/pipelines/animatediff/pipeline_animatediff.py +162 -417
  84. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +165 -137
  85. diffusers/pipelines/animatediff/pipeline_output.py +7 -6
  86. diffusers/pipelines/audioldm/pipeline_audioldm.py +3 -19
  87. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  88. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +3 -3
  89. diffusers/pipelines/auto_pipeline.py +7 -16
  90. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  91. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  92. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  93. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
  94. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  95. diffusers/pipelines/controlnet/pipeline_controlnet.py +90 -90
  96. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
  97. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +98 -90
  98. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +92 -90
  99. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +145 -70
  100. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +126 -89
  101. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +108 -96
  102. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -2
  103. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +1 -1
  104. diffusers/pipelines/ddim/pipeline_ddim.py +1 -1
  105. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  106. diffusers/pipelines/deepfloyd_if/pipeline_if.py +4 -4
  107. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +4 -4
  108. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +5 -5
  109. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +4 -4
  110. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +5 -5
  111. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +5 -5
  112. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +10 -120
  113. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -91
  114. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  115. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +1 -1
  116. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  117. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +1 -1
  118. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
  119. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  120. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  121. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  122. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  123. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  124. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +5 -4
  125. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +5 -4
  126. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +7 -22
  127. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -39
  128. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +5 -5
  129. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  130. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +31 -22
  131. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
  132. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
  133. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -2
  134. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  135. diffusers/pipelines/dit/pipeline_dit.py +1 -1
  136. diffusers/pipelines/free_init_utils.py +184 -0
  137. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +22 -104
  138. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +1 -1
  139. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
  140. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +1 -1
  141. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +2 -2
  142. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +1 -1
  143. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +1 -1
  144. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +1 -1
  145. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +1 -1
  146. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
  147. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
  148. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +2 -2
  149. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +104 -93
  150. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +112 -74
  151. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  152. diffusers/pipelines/ledits_pp/__init__.py +55 -0
  153. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +1505 -0
  154. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +1797 -0
  155. diffusers/pipelines/ledits_pp/pipeline_output.py +43 -0
  156. diffusers/pipelines/musicldm/pipeline_musicldm.py +3 -19
  157. diffusers/pipelines/onnx_utils.py +1 -1
  158. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  159. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +3 -3
  160. diffusers/pipelines/pia/pipeline_pia.py +168 -327
  161. diffusers/pipelines/pipeline_flax_utils.py +1 -1
  162. diffusers/pipelines/pipeline_loading_utils.py +508 -0
  163. diffusers/pipelines/pipeline_utils.py +188 -534
  164. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +56 -10
  165. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +3 -3
  166. diffusers/pipelines/shap_e/camera.py +1 -1
  167. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  168. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  169. diffusers/pipelines/shap_e/renderer.py +1 -1
  170. diffusers/pipelines/stable_cascade/__init__.py +50 -0
  171. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +482 -0
  172. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +311 -0
  173. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +638 -0
  174. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  175. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +4 -1
  176. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  177. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +2 -2
  178. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  179. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +1 -1
  180. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +1 -1
  181. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +1 -1
  182. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +1 -1
  183. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +90 -146
  184. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -4
  185. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +4 -32
  186. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +92 -119
  187. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +92 -119
  188. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +13 -59
  189. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +3 -31
  190. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -33
  191. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +5 -21
  192. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +7 -21
  193. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  194. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  195. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  196. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +5 -21
  197. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +9 -38
  198. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +5 -34
  199. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +6 -35
  200. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +7 -6
  201. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +4 -124
  202. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +282 -80
  203. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +94 -46
  204. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +3 -3
  205. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  206. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +6 -22
  207. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  208. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +96 -148
  209. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +98 -154
  210. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +98 -153
  211. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +25 -87
  212. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +89 -80
  213. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +5 -49
  214. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +80 -88
  215. diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +8 -6
  216. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +15 -86
  217. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +20 -93
  218. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +5 -5
  219. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +3 -19
  220. diffusers/pipelines/unclip/pipeline_unclip.py +1 -1
  221. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +1 -1
  222. diffusers/pipelines/unclip/text_proj.py +1 -1
  223. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +35 -35
  224. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  225. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +4 -21
  226. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +2 -2
  227. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -5
  228. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  229. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +1 -1
  230. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +2 -2
  231. diffusers/schedulers/__init__.py +7 -1
  232. diffusers/schedulers/deprecated/scheduling_karras_ve.py +1 -1
  233. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  234. diffusers/schedulers/scheduling_consistency_models.py +42 -19
  235. diffusers/schedulers/scheduling_ddim.py +2 -4
  236. diffusers/schedulers/scheduling_ddim_flax.py +13 -5
  237. diffusers/schedulers/scheduling_ddim_inverse.py +2 -4
  238. diffusers/schedulers/scheduling_ddim_parallel.py +2 -4
  239. diffusers/schedulers/scheduling_ddpm.py +2 -4
  240. diffusers/schedulers/scheduling_ddpm_flax.py +1 -1
  241. diffusers/schedulers/scheduling_ddpm_parallel.py +2 -4
  242. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +1 -1
  243. diffusers/schedulers/scheduling_deis_multistep.py +46 -19
  244. diffusers/schedulers/scheduling_dpmsolver_multistep.py +107 -21
  245. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +1 -1
  246. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +9 -7
  247. diffusers/schedulers/scheduling_dpmsolver_sde.py +35 -35
  248. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +49 -18
  249. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +683 -0
  250. diffusers/schedulers/scheduling_edm_euler.py +381 -0
  251. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +43 -15
  252. diffusers/schedulers/scheduling_euler_discrete.py +42 -17
  253. diffusers/schedulers/scheduling_euler_discrete_flax.py +1 -1
  254. diffusers/schedulers/scheduling_heun_discrete.py +35 -35
  255. diffusers/schedulers/scheduling_ipndm.py +37 -11
  256. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +44 -44
  257. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +44 -44
  258. diffusers/schedulers/scheduling_karras_ve_flax.py +1 -1
  259. diffusers/schedulers/scheduling_lcm.py +38 -14
  260. diffusers/schedulers/scheduling_lms_discrete.py +43 -15
  261. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  262. diffusers/schedulers/scheduling_pndm.py +2 -4
  263. diffusers/schedulers/scheduling_pndm_flax.py +2 -4
  264. diffusers/schedulers/scheduling_repaint.py +1 -1
  265. diffusers/schedulers/scheduling_sasolver.py +41 -9
  266. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  267. diffusers/schedulers/scheduling_sde_ve_flax.py +1 -1
  268. diffusers/schedulers/scheduling_tcd.py +686 -0
  269. diffusers/schedulers/scheduling_unclip.py +1 -1
  270. diffusers/schedulers/scheduling_unipc_multistep.py +46 -19
  271. diffusers/schedulers/scheduling_utils.py +2 -1
  272. diffusers/schedulers/scheduling_utils_flax.py +1 -1
  273. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  274. diffusers/training_utils.py +9 -2
  275. diffusers/utils/__init__.py +2 -1
  276. diffusers/utils/accelerate_utils.py +1 -1
  277. diffusers/utils/constants.py +1 -1
  278. diffusers/utils/doc_utils.py +1 -1
  279. diffusers/utils/dummy_pt_objects.py +60 -0
  280. diffusers/utils/dummy_torch_and_transformers_objects.py +75 -0
  281. diffusers/utils/dynamic_modules_utils.py +1 -1
  282. diffusers/utils/export_utils.py +3 -3
  283. diffusers/utils/hub_utils.py +60 -16
  284. diffusers/utils/import_utils.py +15 -1
  285. diffusers/utils/loading_utils.py +2 -0
  286. diffusers/utils/logging.py +1 -1
  287. diffusers/utils/model_card_template.md +24 -0
  288. diffusers/utils/outputs.py +14 -7
  289. diffusers/utils/peft_utils.py +1 -1
  290. diffusers/utils/state_dict_utils.py +1 -1
  291. diffusers/utils/testing_utils.py +2 -0
  292. diffusers/utils/torch_utils.py +1 -1
  293. {diffusers-0.26.3.dist-info → diffusers-0.27.0.dist-info}/METADATA +46 -46
  294. diffusers-0.27.0.dist-info/RECORD +399 -0
  295. {diffusers-0.26.3.dist-info → diffusers-0.27.0.dist-info}/WHEEL +1 -1
  296. diffusers-0.26.3.dist-info/RECORD +0 -384
  297. {diffusers-0.26.3.dist-info → diffusers-0.27.0.dist-info}/LICENSE +0 -0
  298. {diffusers-0.26.3.dist-info → diffusers-0.27.0.dist-info}/entry_points.txt +0 -0
  299. {diffusers-0.26.3.dist-info → diffusers-0.27.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2023 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2024 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -21,16 +21,33 @@ import PIL.Image
21
21
  import torch
22
22
  from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
23
23
 
24
- from ...image_processor import VaeImageProcessor
24
+ from ...image_processor import PipelineImageInput, VaeImageProcessor
25
25
  from ...models import AutoencoderKLTemporalDecoder, UNetSpatioTemporalConditionModel
26
26
  from ...schedulers import EulerDiscreteScheduler
27
- from ...utils import BaseOutput, logging
27
+ from ...utils import BaseOutput, logging, replace_example_docstring
28
28
  from ...utils.torch_utils import is_compiled_module, randn_tensor
29
29
  from ..pipeline_utils import DiffusionPipeline
30
30
 
31
31
 
32
32
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
33
33
 
34
+ EXAMPLE_DOC_STRING = """
35
+ Examples:
36
+ ```py
37
+ >>> from diffusers import StableVideoDiffusionPipeline
38
+ >>> from diffusers.utils import load_image, export_to_video
39
+
40
+ >>> pipe = StableVideoDiffusionPipeline.from_pretrained("stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16")
41
+ >>> pipe.to("cuda")
42
+
43
+ >>> image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd-docstring-example.jpeg")
44
+ >>> image = image.resize((1024, 576))
45
+
46
+ >>> frames = pipe(image, num_frames=25, decode_chunk_size=8).frames[0]
47
+ >>> export_to_video(frames, "generated.mp4", fps=7)
48
+ ```
49
+ """
50
+
34
51
 
35
52
  def _append_dims(x, target_dims):
36
53
  """Appends dimensions to the end of a tensor until it has target_dims dimensions."""
@@ -41,7 +58,7 @@ def _append_dims(x, target_dims):
41
58
 
42
59
 
43
60
  # Copied from diffusers.pipelines.animatediff.pipeline_animatediff.tensor2vid
44
- def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: str = "np"):
61
+ def tensor2vid(video: torch.Tensor, processor: VaeImageProcessor, output_type: str = "np"):
45
62
  batch_size, channels, num_frames, height, width = video.shape
46
63
  outputs = []
47
64
  for batch_idx in range(batch_size):
@@ -57,7 +74,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type:
57
74
  outputs = torch.stack(outputs)
58
75
 
59
76
  elif not output_type == "pil":
60
- raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]")
77
+ raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil']")
61
78
 
62
79
  return outputs
63
80
 
@@ -65,15 +82,15 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type:
65
82
  @dataclass
66
83
  class StableVideoDiffusionPipelineOutput(BaseOutput):
67
84
  r"""
68
- Output class for zero-shot text-to-video pipeline.
85
+ Output class for Stable Video Diffusion pipeline.
69
86
 
70
87
  Args:
71
- frames (`[List[PIL.Image.Image]`, `np.ndarray`]):
72
- List of denoised PIL images of length `batch_size` or NumPy array of shape `(batch_size, height, width,
73
- num_channels)`.
88
+ frames (`[List[List[PIL.Image.Image]]`, `np.ndarray`, `torch.FloatTensor`]):
89
+ List of denoised PIL images of length `batch_size` or numpy array or torch tensor
90
+ of shape `(batch_size, num_frames, height, width, num_channels)`.
74
91
  """
75
92
 
76
- frames: Union[List[PIL.Image.Image], np.ndarray]
93
+ frames: Union[List[List[PIL.Image.Image]], np.ndarray, torch.FloatTensor]
77
94
 
78
95
 
79
96
  class StableVideoDiffusionPipeline(DiffusionPipeline):
@@ -119,7 +136,13 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
119
136
  self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
120
137
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
121
138
 
122
- def _encode_image(self, image, device, num_videos_per_prompt, do_classifier_free_guidance):
139
+ def _encode_image(
140
+ self,
141
+ image: PipelineImageInput,
142
+ device: Union[str, torch.device],
143
+ num_videos_per_prompt: int,
144
+ do_classifier_free_guidance: bool,
145
+ ) -> torch.FloatTensor:
123
146
  dtype = next(self.image_encoder.parameters()).dtype
124
147
 
125
148
  if not isinstance(image, torch.Tensor):
@@ -132,15 +155,15 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
132
155
  image = _resize_with_antialiasing(image, (224, 224))
133
156
  image = (image + 1.0) / 2.0
134
157
 
135
- # Normalize the image with for CLIP input
136
- image = self.feature_extractor(
137
- images=image,
138
- do_normalize=True,
139
- do_center_crop=False,
140
- do_resize=False,
141
- do_rescale=False,
142
- return_tensors="pt",
143
- ).pixel_values
158
+ # Normalize the image with for CLIP input
159
+ image = self.feature_extractor(
160
+ images=image,
161
+ do_normalize=True,
162
+ do_center_crop=False,
163
+ do_resize=False,
164
+ do_rescale=False,
165
+ return_tensors="pt",
166
+ ).pixel_values
144
167
 
145
168
  image = image.to(device=device, dtype=dtype)
146
169
  image_embeddings = self.image_encoder(image).image_embeds
@@ -164,9 +187,9 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
164
187
  def _encode_vae_image(
165
188
  self,
166
189
  image: torch.Tensor,
167
- device,
168
- num_videos_per_prompt,
169
- do_classifier_free_guidance,
190
+ device: Union[str, torch.device],
191
+ num_videos_per_prompt: int,
192
+ do_classifier_free_guidance: bool,
170
193
  ):
171
194
  image = image.to(device=device)
172
195
  image_latents = self.vae.encode(image).latent_dist.mode()
@@ -186,13 +209,13 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
186
209
 
187
210
  def _get_add_time_ids(
188
211
  self,
189
- fps,
190
- motion_bucket_id,
191
- noise_aug_strength,
192
- dtype,
193
- batch_size,
194
- num_videos_per_prompt,
195
- do_classifier_free_guidance,
212
+ fps: int,
213
+ motion_bucket_id: int,
214
+ noise_aug_strength: float,
215
+ dtype: torch.dtype,
216
+ batch_size: int,
217
+ num_videos_per_prompt: int,
218
+ do_classifier_free_guidance: bool,
196
219
  ):
197
220
  add_time_ids = [fps, motion_bucket_id, noise_aug_strength]
198
221
 
@@ -212,7 +235,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
212
235
 
213
236
  return add_time_ids
214
237
 
215
- def decode_latents(self, latents, num_frames, decode_chunk_size=14):
238
+ def decode_latents(self, latents: torch.FloatTensor, num_frames: int, decode_chunk_size: int = 14):
216
239
  # [batch, frames, channels, height, width] -> [batch*frames, channels, height, width]
217
240
  latents = latents.flatten(0, 1)
218
241
 
@@ -257,15 +280,15 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
257
280
 
258
281
  def prepare_latents(
259
282
  self,
260
- batch_size,
261
- num_frames,
262
- num_channels_latents,
263
- height,
264
- width,
265
- dtype,
266
- device,
267
- generator,
268
- latents=None,
283
+ batch_size: int,
284
+ num_frames: int,
285
+ num_channels_latents: int,
286
+ height: int,
287
+ width: int,
288
+ dtype: torch.dtype,
289
+ device: Union[str, torch.device],
290
+ generator: torch.Generator,
291
+ latents: Optional[torch.FloatTensor] = None,
269
292
  ):
270
293
  shape = (
271
294
  batch_size,
@@ -299,7 +322,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
299
322
  @property
300
323
  def do_classifier_free_guidance(self):
301
324
  if isinstance(self.guidance_scale, (int, float)):
302
- return self.guidance_scale
325
+ return self.guidance_scale > 1
303
326
  return self.guidance_scale.max() > 1
304
327
 
305
328
  @property
@@ -307,6 +330,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
307
330
  return self._num_timesteps
308
331
 
309
332
  @torch.no_grad()
333
+ @replace_example_docstring(EXAMPLE_DOC_STRING)
310
334
  def __call__(
311
335
  self,
312
336
  image: Union[PIL.Image.Image, List[PIL.Image.Image], torch.FloatTensor],
@@ -333,16 +357,16 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
333
357
 
334
358
  Args:
335
359
  image (`PIL.Image.Image` or `List[PIL.Image.Image]` or `torch.FloatTensor`):
336
- Image or images to guide image generation. If you provide a tensor, it needs to be compatible with
337
- [`CLIPImageProcessor`](https://huggingface.co/lambdalabs/sd-image-variations-diffusers/blob/main/feature_extractor/preprocessor_config.json).
360
+ Image(s) to guide image generation. If you provide a tensor, the expected value range is between `[0, 1]`.
338
361
  height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
339
362
  The height in pixels of the generated image.
340
363
  width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
341
364
  The width in pixels of the generated image.
342
365
  num_frames (`int`, *optional*):
343
- The number of video frames to generate. Defaults to 14 for `stable-video-diffusion-img2vid` and to 25 for `stable-video-diffusion-img2vid-xt`
366
+ The number of video frames to generate. Defaults to `self.unet.config.num_frames`
367
+ (14 for `stable-video-diffusion-img2vid` and to 25 for `stable-video-diffusion-img2vid-xt`).
344
368
  num_inference_steps (`int`, *optional*, defaults to 25):
345
- The number of denoising steps. More denoising steps usually lead to a higher quality image at the
369
+ The number of denoising steps. More denoising steps usually lead to a higher quality video at the
346
370
  expense of slower inference. This parameter is modulated by `strength`.
347
371
  min_guidance_scale (`float`, *optional*, defaults to 1.0):
348
372
  The minimum guidance scale. Used for the classifier free guidance with first frame.
@@ -352,29 +376,29 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
352
376
  Frames per second. The rate at which the generated images shall be exported to a video after generation.
353
377
  Note that Stable Diffusion Video's UNet was micro-conditioned on fps-1 during training.
354
378
  motion_bucket_id (`int`, *optional*, defaults to 127):
355
- The motion bucket ID. Used as conditioning for the generation. The higher the number the more motion will be in the video.
379
+ Used for conditioning the amount of motion for the generation. The higher the number the more motion
380
+ will be in the video.
356
381
  noise_aug_strength (`float`, *optional*, defaults to 0.02):
357
382
  The amount of noise added to the init image, the higher it is the less the video will look like the init image. Increase it for more motion.
358
383
  decode_chunk_size (`int`, *optional*):
359
- The number of frames to decode at a time. The higher the chunk size, the higher the temporal consistency
360
- between frames, but also the higher the memory consumption. By default, the decoder will decode all frames at once
361
- for maximal quality. Reduce `decode_chunk_size` to reduce memory usage.
384
+ The number of frames to decode at a time. Higher chunk size leads to better temporal consistency at the expense of more memory usage. By default, the decoder decodes all frames at once for maximal
385
+ quality. For lower memory usage, reduce `decode_chunk_size`.
362
386
  num_videos_per_prompt (`int`, *optional*, defaults to 1):
363
- The number of images to generate per prompt.
387
+ The number of videos to generate per prompt.
364
388
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
365
389
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
366
390
  generation deterministic.
367
391
  latents (`torch.FloatTensor`, *optional*):
368
- Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
392
+ Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video
369
393
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
370
394
  tensor is generated by sampling using the supplied random `generator`.
371
395
  output_type (`str`, *optional*, defaults to `"pil"`):
372
- The output format of the generated image. Choose between `PIL.Image` or `np.array`.
396
+ The output format of the generated image. Choose between `pil`, `np` or `pt`.
373
397
  callback_on_step_end (`Callable`, *optional*):
374
- A function that calls at the end of each denoising steps during the inference. The function is called
375
- with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
376
- callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
377
- `callback_on_step_end_tensor_inputs`.
398
+ A function that is called at the end of each denoising step during inference. The function is called
399
+ with the following arguments:
400
+ `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)`.
401
+ `callback_kwargs` will include a list of all tensors as specified by `callback_on_step_end_tensor_inputs`.
378
402
  callback_on_step_end_tensor_inputs (`List`, *optional*):
379
403
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
380
404
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
@@ -383,26 +407,12 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
383
407
  Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
384
408
  plain tuple.
385
409
 
410
+ Examples:
411
+
386
412
  Returns:
387
413
  [`~pipelines.stable_diffusion.StableVideoDiffusionPipelineOutput`] or `tuple`:
388
414
  If `return_dict` is `True`, [`~pipelines.stable_diffusion.StableVideoDiffusionPipelineOutput`] is returned,
389
- otherwise a `tuple` is returned where the first element is a list of list with the generated frames.
390
-
391
- Examples:
392
-
393
- ```py
394
- from diffusers import StableVideoDiffusionPipeline
395
- from diffusers.utils import load_image, export_to_video
396
-
397
- pipe = StableVideoDiffusionPipeline.from_pretrained("stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16")
398
- pipe.to("cuda")
399
-
400
- image = load_image("https://lh3.googleusercontent.com/y-iFOHfLTwkuQSUegpwDdgKmOjRSTvPxat63dQLB25xkTs4lhIbRUFeNBWZzYf370g=s1200")
401
- image = image.resize((1024, 576))
402
-
403
- frames = pipe(image, num_frames=25, decode_chunk_size=8).frames[0]
404
- export_to_video(frames, "generated.mp4", fps=7)
405
- ```
415
+ otherwise a `tuple` of (`List[List[PIL.Image.Image]]` or `np.ndarray` or `torch.FloatTensor`) is returned.
406
416
  """
407
417
  # 0. Default height and width to unet
408
418
  height = height or self.unet.config.sample_size * self.vae_scale_factor
@@ -430,8 +440,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
430
440
  # 3. Encode input image
431
441
  image_embeddings = self._encode_image(image, device, num_videos_per_prompt, self.do_classifier_free_guidance)
432
442
 
433
- # NOTE: Stable Diffusion Video was conditioned on fps - 1, which
434
- # is why it is reduced here.
443
+ # NOTE: Stable Video Diffusion was conditioned on fps - 1, which is why it is reduced here.
435
444
  # See: https://github.com/Stability-AI/generative-models/blob/ed0997173f98eaf8f4edf7ba5fe8f15c6b877fd3/scripts/sampling/simple_video_sample.py#L188
436
445
  fps = fps - 1
437
446
 
@@ -472,11 +481,11 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
472
481
  )
473
482
  added_time_ids = added_time_ids.to(device)
474
483
 
475
- # 4. Prepare timesteps
484
+ # 6. Prepare timesteps
476
485
  self.scheduler.set_timesteps(num_inference_steps, device=device)
477
486
  timesteps = self.scheduler.timesteps
478
487
 
479
- # 5. Prepare latent variables
488
+ # 7. Prepare latent variables
480
489
  num_channels_latents = self.unet.config.in_channels
481
490
  latents = self.prepare_latents(
482
491
  batch_size * num_videos_per_prompt,
@@ -490,7 +499,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
490
499
  latents,
491
500
  )
492
501
 
493
- # 7. Prepare guidance scale
502
+ # 8. Prepare guidance scale
494
503
  guidance_scale = torch.linspace(min_guidance_scale, max_guidance_scale, num_frames).unsqueeze(0)
495
504
  guidance_scale = guidance_scale.to(device, latents.dtype)
496
505
  guidance_scale = guidance_scale.repeat(batch_size * num_videos_per_prompt, 1)
@@ -498,7 +507,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
498
507
 
499
508
  self._guidance_scale = guidance_scale
500
509
 
501
- # 8. Denoising loop
510
+ # 9. Denoising loop
502
511
  num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
503
512
  self._num_timesteps = len(timesteps)
504
513
  with self.progress_bar(total=num_inference_steps) as progress_bar:
@@ -507,7 +516,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
507
516
  latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
508
517
  latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
509
518
 
510
- # Concatenate image_latents over channels dimention
519
+ # Concatenate image_latents over channels dimension
511
520
  latent_model_input = torch.cat([latent_model_input, image_latents], dim=2)
512
521
 
513
522
  # predict the noise residual
@@ -1,4 +1,4 @@
1
- # Copyright 2023 TencentARC and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2024 TencentARC and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -37,7 +37,7 @@ from ...utils import (
37
37
  unscale_lora_layers,
38
38
  )
39
39
  from ...utils.torch_utils import randn_tensor
40
- from ..pipeline_utils import DiffusionPipeline
40
+ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
41
41
  from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
42
42
 
43
43
 
@@ -163,7 +163,7 @@ def retrieve_timesteps(
163
163
  return timesteps, num_inference_steps
164
164
 
165
165
 
166
- class StableDiffusionAdapterPipeline(DiffusionPipeline):
166
+ class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin):
167
167
  r"""
168
168
  Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter
169
169
  https://arxiv.org/abs/2302.08453
@@ -248,22 +248,6 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline):
248
248
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
249
249
  self.register_to_config(requires_safety_checker=requires_safety_checker)
250
250
 
251
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
252
- def enable_vae_slicing(self):
253
- r"""
254
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
255
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
256
- """
257
- self.vae.enable_slicing()
258
-
259
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
260
- def disable_vae_slicing(self):
261
- r"""
262
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
263
- computing decoding in one step.
264
- """
265
- self.vae.disable_slicing()
266
-
267
251
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
268
252
  def _encode_prompt(
269
253
  self,
@@ -358,7 +342,7 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline):
358
342
  batch_size = prompt_embeds.shape[0]
359
343
 
360
344
  if prompt_embeds is None:
361
- # textual inversion: procecss multi-vector tokens if necessary
345
+ # textual inversion: process multi-vector tokens if necessary
362
346
  if isinstance(self, TextualInversionLoaderMixin):
363
347
  prompt = self.maybe_convert_prompt(prompt, self.tokenizer)
364
348
 
@@ -440,7 +424,7 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline):
440
424
  else:
441
425
  uncond_tokens = negative_prompt
442
426
 
443
- # textual inversion: procecss multi-vector tokens if necessary
427
+ # textual inversion: process multi-vector tokens if necessary
444
428
  if isinstance(self, TextualInversionLoaderMixin):
445
429
  uncond_tokens = self.maybe_convert_prompt(uncond_tokens, self.tokenizer)
446
430
 
@@ -628,34 +612,6 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline):
628
612
 
629
613
  return height, width
630
614
 
631
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
632
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
633
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
634
-
635
- The suffixes after the scaling factors represent the stages where they are being applied.
636
-
637
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
638
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
639
-
640
- Args:
641
- s1 (`float`):
642
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
643
- mitigate "oversmoothing effect" in the enhanced denoising process.
644
- s2 (`float`):
645
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
646
- mitigate "oversmoothing effect" in the enhanced denoising process.
647
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
648
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
649
- """
650
- if not hasattr(self, "unet"):
651
- raise ValueError("The pipeline must have `unet` for using FreeU.")
652
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
653
-
654
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
655
- def disable_freeu(self):
656
- """Disables the FreeU mechanism if enabled."""
657
- self.unet.disable_freeu()
658
-
659
615
  # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
660
616
  def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
661
617
  """