diffusers 0.27.2__py3-none-any.whl → 0.28.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. diffusers/__init__.py +26 -1
  2. diffusers/callbacks.py +156 -0
  3. diffusers/commands/env.py +110 -6
  4. diffusers/configuration_utils.py +33 -11
  5. diffusers/dependency_versions_table.py +2 -1
  6. diffusers/image_processor.py +158 -45
  7. diffusers/loaders/__init__.py +2 -5
  8. diffusers/loaders/autoencoder.py +4 -4
  9. diffusers/loaders/controlnet.py +4 -4
  10. diffusers/loaders/ip_adapter.py +80 -22
  11. diffusers/loaders/lora.py +134 -20
  12. diffusers/loaders/lora_conversion_utils.py +46 -43
  13. diffusers/loaders/peft.py +4 -3
  14. diffusers/loaders/single_file.py +401 -170
  15. diffusers/loaders/single_file_model.py +290 -0
  16. diffusers/loaders/single_file_utils.py +616 -672
  17. diffusers/loaders/textual_inversion.py +41 -20
  18. diffusers/loaders/unet.py +168 -115
  19. diffusers/loaders/unet_loader_utils.py +163 -0
  20. diffusers/models/__init__.py +8 -0
  21. diffusers/models/activations.py +23 -3
  22. diffusers/models/attention.py +10 -11
  23. diffusers/models/attention_processor.py +475 -148
  24. diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
  25. diffusers/models/autoencoders/autoencoder_kl.py +18 -19
  26. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
  27. diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
  28. diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
  29. diffusers/models/autoencoders/vae.py +23 -24
  30. diffusers/models/controlnet.py +12 -9
  31. diffusers/models/controlnet_flax.py +4 -4
  32. diffusers/models/controlnet_xs.py +1915 -0
  33. diffusers/models/downsampling.py +17 -18
  34. diffusers/models/embeddings.py +363 -32
  35. diffusers/models/model_loading_utils.py +177 -0
  36. diffusers/models/modeling_flax_pytorch_utils.py +2 -1
  37. diffusers/models/modeling_flax_utils.py +4 -4
  38. diffusers/models/modeling_outputs.py +14 -0
  39. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  40. diffusers/models/modeling_utils.py +175 -99
  41. diffusers/models/normalization.py +2 -1
  42. diffusers/models/resnet.py +18 -23
  43. diffusers/models/transformer_temporal.py +3 -3
  44. diffusers/models/transformers/__init__.py +3 -0
  45. diffusers/models/transformers/dit_transformer_2d.py +240 -0
  46. diffusers/models/transformers/dual_transformer_2d.py +4 -4
  47. diffusers/models/transformers/hunyuan_transformer_2d.py +427 -0
  48. diffusers/models/transformers/pixart_transformer_2d.py +336 -0
  49. diffusers/models/transformers/prior_transformer.py +7 -7
  50. diffusers/models/transformers/t5_film_transformer.py +17 -19
  51. diffusers/models/transformers/transformer_2d.py +292 -184
  52. diffusers/models/transformers/transformer_temporal.py +10 -10
  53. diffusers/models/unets/unet_1d.py +5 -5
  54. diffusers/models/unets/unet_1d_blocks.py +29 -29
  55. diffusers/models/unets/unet_2d.py +6 -6
  56. diffusers/models/unets/unet_2d_blocks.py +137 -128
  57. diffusers/models/unets/unet_2d_condition.py +19 -15
  58. diffusers/models/unets/unet_2d_condition_flax.py +6 -5
  59. diffusers/models/unets/unet_3d_blocks.py +79 -77
  60. diffusers/models/unets/unet_3d_condition.py +13 -9
  61. diffusers/models/unets/unet_i2vgen_xl.py +14 -13
  62. diffusers/models/unets/unet_kandinsky3.py +1 -1
  63. diffusers/models/unets/unet_motion_model.py +114 -14
  64. diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
  65. diffusers/models/unets/unet_stable_cascade.py +16 -13
  66. diffusers/models/upsampling.py +17 -20
  67. diffusers/models/vq_model.py +16 -15
  68. diffusers/pipelines/__init__.py +27 -3
  69. diffusers/pipelines/amused/pipeline_amused.py +12 -12
  70. diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
  71. diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
  72. diffusers/pipelines/animatediff/__init__.py +2 -0
  73. diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
  74. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
  75. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
  76. diffusers/pipelines/animatediff/pipeline_output.py +3 -2
  77. diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
  78. diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
  79. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
  80. diffusers/pipelines/auto_pipeline.py +21 -17
  81. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  82. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
  83. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
  84. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
  85. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
  86. diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
  87. diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
  88. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
  89. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
  90. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
  91. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
  92. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
  93. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
  94. diffusers/pipelines/controlnet_xs/__init__.py +68 -0
  95. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
  96. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
  97. diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
  98. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
  99. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
  100. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
  101. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
  102. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
  103. diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
  104. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
  105. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
  106. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
  107. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
  108. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
  109. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -18
  110. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
  111. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
  112. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
  113. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
  114. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
  115. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
  116. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
  117. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
  118. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
  119. diffusers/pipelines/dit/pipeline_dit.py +7 -4
  120. diffusers/pipelines/free_init_utils.py +39 -38
  121. diffusers/pipelines/hunyuandit/__init__.py +48 -0
  122. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +881 -0
  123. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
  124. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
  125. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
  126. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
  127. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
  128. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
  129. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  130. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
  131. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
  132. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
  133. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
  134. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
  135. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
  136. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
  137. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
  138. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
  139. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
  140. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
  141. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
  142. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
  143. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
  144. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
  145. diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
  146. diffusers/pipelines/marigold/__init__.py +50 -0
  147. diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
  148. diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
  149. diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
  150. diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
  151. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
  152. diffusers/pipelines/pia/pipeline_pia.py +39 -125
  153. diffusers/pipelines/pipeline_flax_utils.py +4 -4
  154. diffusers/pipelines/pipeline_loading_utils.py +269 -23
  155. diffusers/pipelines/pipeline_utils.py +266 -37
  156. diffusers/pipelines/pixart_alpha/__init__.py +8 -1
  157. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +69 -79
  158. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
  159. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
  160. diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
  161. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
  162. diffusers/pipelines/shap_e/renderer.py +1 -1
  163. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +18 -18
  164. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
  165. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
  166. diffusers/pipelines/stable_diffusion/__init__.py +0 -1
  167. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
  168. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  169. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
  170. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
  171. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
  172. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
  173. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
  174. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
  175. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
  176. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
  177. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
  178. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
  179. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
  180. diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
  181. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
  182. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -39
  183. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
  184. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
  185. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
  186. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
  187. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
  188. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
  189. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
  190. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  191. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
  192. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
  193. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
  194. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
  195. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
  196. diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
  197. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
  198. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
  199. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
  200. diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
  201. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
  202. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
  203. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
  204. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
  205. diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
  206. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
  207. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
  208. diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
  209. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
  210. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
  211. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
  212. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
  213. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
  214. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
  215. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
  216. diffusers/schedulers/__init__.py +2 -2
  217. diffusers/schedulers/deprecated/__init__.py +1 -1
  218. diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
  219. diffusers/schedulers/scheduling_amused.py +5 -5
  220. diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
  221. diffusers/schedulers/scheduling_consistency_models.py +20 -26
  222. diffusers/schedulers/scheduling_ddim.py +22 -24
  223. diffusers/schedulers/scheduling_ddim_flax.py +2 -1
  224. diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
  225. diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
  226. diffusers/schedulers/scheduling_ddpm.py +20 -22
  227. diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
  228. diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
  229. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
  230. diffusers/schedulers/scheduling_deis_multistep.py +42 -42
  231. diffusers/schedulers/scheduling_dpmsolver_multistep.py +103 -77
  232. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
  233. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
  234. diffusers/schedulers/scheduling_dpmsolver_sde.py +23 -23
  235. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +86 -65
  236. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +75 -54
  237. diffusers/schedulers/scheduling_edm_euler.py +50 -31
  238. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +23 -29
  239. diffusers/schedulers/scheduling_euler_discrete.py +160 -68
  240. diffusers/schedulers/scheduling_heun_discrete.py +57 -39
  241. diffusers/schedulers/scheduling_ipndm.py +8 -8
  242. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +19 -19
  243. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +19 -19
  244. diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
  245. diffusers/schedulers/scheduling_lcm.py +21 -23
  246. diffusers/schedulers/scheduling_lms_discrete.py +24 -26
  247. diffusers/schedulers/scheduling_pndm.py +20 -20
  248. diffusers/schedulers/scheduling_repaint.py +20 -20
  249. diffusers/schedulers/scheduling_sasolver.py +55 -54
  250. diffusers/schedulers/scheduling_sde_ve.py +19 -19
  251. diffusers/schedulers/scheduling_tcd.py +39 -30
  252. diffusers/schedulers/scheduling_unclip.py +15 -15
  253. diffusers/schedulers/scheduling_unipc_multistep.py +111 -41
  254. diffusers/schedulers/scheduling_utils.py +14 -5
  255. diffusers/schedulers/scheduling_utils_flax.py +3 -3
  256. diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
  257. diffusers/training_utils.py +56 -1
  258. diffusers/utils/__init__.py +7 -0
  259. diffusers/utils/doc_utils.py +1 -0
  260. diffusers/utils/dummy_pt_objects.py +75 -0
  261. diffusers/utils/dummy_torch_and_transformers_objects.py +105 -0
  262. diffusers/utils/dynamic_modules_utils.py +24 -11
  263. diffusers/utils/hub_utils.py +3 -2
  264. diffusers/utils/import_utils.py +91 -0
  265. diffusers/utils/loading_utils.py +2 -2
  266. diffusers/utils/logging.py +1 -1
  267. diffusers/utils/peft_utils.py +32 -5
  268. diffusers/utils/state_dict_utils.py +11 -2
  269. diffusers/utils/testing_utils.py +71 -6
  270. diffusers/utils/torch_utils.py +1 -0
  271. diffusers/video_processor.py +113 -0
  272. {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/METADATA +7 -7
  273. diffusers-0.28.1.dist-info/RECORD +419 -0
  274. diffusers-0.27.2.dist-info/RECORD +0 -399
  275. {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/LICENSE +0 -0
  276. {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/WHEEL +0 -0
  277. {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/entry_points.txt +0 -0
  278. {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/top_level.txt +0 -0
@@ -120,8 +120,8 @@ class MusicLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
120
120
  num_waveforms_per_prompt,
121
121
  do_classifier_free_guidance,
122
122
  negative_prompt=None,
123
- prompt_embeds: Optional[torch.FloatTensor] = None,
124
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
123
+ prompt_embeds: Optional[torch.Tensor] = None,
124
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
125
125
  ):
126
126
  r"""
127
127
  Encodes the prompt into text encoder hidden states.
@@ -139,10 +139,10 @@ class MusicLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
139
139
  The prompt or prompts not to guide the audio generation. If not defined, one has to pass
140
140
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
141
141
  less than `1`).
142
- prompt_embeds (`torch.FloatTensor`, *optional*):
142
+ prompt_embeds (`torch.Tensor`, *optional*):
143
143
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
144
144
  provided, text embeddings will be generated from `prompt` input argument.
145
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
145
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
146
146
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
147
147
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
148
148
  argument.
@@ -363,8 +363,8 @@ class MusicLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
363
363
  shape = (
364
364
  batch_size,
365
365
  num_channels_latents,
366
- height // self.vae_scale_factor,
367
- self.vocoder.config.model_in_dim // self.vae_scale_factor,
366
+ int(height) // self.vae_scale_factor,
367
+ int(self.vocoder.config.model_in_dim) // self.vae_scale_factor,
368
368
  )
369
369
  if isinstance(generator, list) and len(generator) != batch_size:
370
370
  raise ValueError(
@@ -427,11 +427,11 @@ class MusicLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
427
427
  num_waveforms_per_prompt: Optional[int] = 1,
428
428
  eta: float = 0.0,
429
429
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
430
- latents: Optional[torch.FloatTensor] = None,
431
- prompt_embeds: Optional[torch.FloatTensor] = None,
432
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
430
+ latents: Optional[torch.Tensor] = None,
431
+ prompt_embeds: Optional[torch.Tensor] = None,
432
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
433
433
  return_dict: bool = True,
434
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
434
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
435
435
  callback_steps: Optional[int] = 1,
436
436
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
437
437
  output_type: Optional[str] = "np",
@@ -465,21 +465,21 @@ class MusicLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
465
465
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
466
466
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
467
467
  generation deterministic.
468
- latents (`torch.FloatTensor`, *optional*):
468
+ latents (`torch.Tensor`, *optional*):
469
469
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
470
470
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
471
471
  tensor is generated by sampling using the supplied random `generator`.
472
- prompt_embeds (`torch.FloatTensor`, *optional*):
472
+ prompt_embeds (`torch.Tensor`, *optional*):
473
473
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
474
474
  provided, text embeddings are generated from the `prompt` input argument.
475
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
475
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
476
476
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
477
477
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
478
478
  return_dict (`bool`, *optional*, defaults to `True`):
479
479
  Whether or not to return a [`~pipelines.AudioPipelineOutput`] instead of a plain tuple.
480
480
  callback (`Callable`, *optional*):
481
481
  A function that calls every `callback_steps` steps during inference. The function is called with the
482
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
482
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
483
483
  callback_steps (`int`, *optional*, defaults to 1):
484
484
  The frequency at which the `callback` function is called. If not specified, the callback is called at
485
485
  every step.
@@ -266,7 +266,7 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
266
266
  and not isinstance(image, list)
267
267
  ):
268
268
  raise ValueError(
269
- "`image` has to be of type `torch.FloatTensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
269
+ "`image` has to be of type `torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
270
270
  f" {type(image)}"
271
271
  )
272
272
 
@@ -283,7 +283,12 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
283
283
 
284
284
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
285
285
  def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
286
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
286
+ shape = (
287
+ batch_size,
288
+ num_channels_latents,
289
+ int(height) // self.vae_scale_factor,
290
+ int(width) // self.vae_scale_factor,
291
+ )
287
292
  if isinstance(generator, list) and len(generator) != batch_size:
288
293
  raise ValueError(
289
294
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -388,9 +393,9 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
388
393
  @torch.no_grad()
389
394
  def __call__(
390
395
  self,
391
- example_image: Union[torch.FloatTensor, PIL.Image.Image],
392
- image: Union[torch.FloatTensor, PIL.Image.Image],
393
- mask_image: Union[torch.FloatTensor, PIL.Image.Image],
396
+ example_image: Union[torch.Tensor, PIL.Image.Image],
397
+ image: Union[torch.Tensor, PIL.Image.Image],
398
+ mask_image: Union[torch.Tensor, PIL.Image.Image],
394
399
  height: Optional[int] = None,
395
400
  width: Optional[int] = None,
396
401
  num_inference_steps: int = 50,
@@ -399,22 +404,22 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
399
404
  num_images_per_prompt: Optional[int] = 1,
400
405
  eta: float = 0.0,
401
406
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
402
- latents: Optional[torch.FloatTensor] = None,
407
+ latents: Optional[torch.Tensor] = None,
403
408
  output_type: Optional[str] = "pil",
404
409
  return_dict: bool = True,
405
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
410
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
406
411
  callback_steps: int = 1,
407
412
  ):
408
413
  r"""
409
414
  The call function to the pipeline for generation.
410
415
 
411
416
  Args:
412
- example_image (`torch.FloatTensor` or `PIL.Image.Image` or `List[PIL.Image.Image]`):
417
+ example_image (`torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]`):
413
418
  An example image to guide image generation.
414
- image (`torch.FloatTensor` or `PIL.Image.Image` or `List[PIL.Image.Image]`):
419
+ image (`torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]`):
415
420
  `Image` or tensor representing an image batch to be inpainted (parts of the image are masked out with
416
421
  `mask_image` and repainted according to `prompt`).
417
- mask_image (`torch.FloatTensor` or `PIL.Image.Image` or `List[PIL.Image.Image]`):
422
+ mask_image (`torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]`):
418
423
  `Image` or tensor representing an image batch to mask `image`. White pixels in the mask are repainted,
419
424
  while black pixels are preserved. If `mask_image` is a PIL image, it is converted to a single channel
420
425
  (luminance) before use. If it's a tensor, it should contain one color channel (L) instead of 3, so the
@@ -440,7 +445,7 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
440
445
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
441
446
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
442
447
  generation deterministic.
443
- latents (`torch.FloatTensor`, *optional*):
448
+ latents (`torch.Tensor`, *optional*):
444
449
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
445
450
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
446
451
  tensor is generated by sampling using the supplied random `generator`.
@@ -451,7 +456,7 @@ class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
451
456
  plain tuple.
452
457
  callback (`Callable`, *optional*):
453
458
  A function that calls every `callback_steps` steps during inference. The function is called with the
454
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
459
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
455
460
  callback_steps (`int`, *optional*, defaults to 1):
456
461
  The frequency at which the `callback` function is called. If not specified, the callback is called at
457
462
  every step.
@@ -13,17 +13,15 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import inspect
16
- import math
17
16
  from dataclasses import dataclass
18
- from typing import Any, Callable, Dict, List, Optional, Tuple, Union
17
+ from typing import Any, Callable, Dict, List, Optional, Union
19
18
 
20
19
  import numpy as np
21
20
  import PIL
22
21
  import torch
23
- import torch.fft as fft
24
22
  from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
25
23
 
26
- from ...image_processor import PipelineImageInput, VaeImageProcessor
24
+ from ...image_processor import PipelineImageInput
27
25
  from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
28
26
  from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
29
27
  from ...models.lora import adjust_lora_scale_text_encoder
@@ -45,6 +43,7 @@ from ...utils import (
45
43
  unscale_lora_layers,
46
44
  )
47
45
  from ...utils.torch_utils import randn_tensor
46
+ from ...video_processor import VideoProcessor
48
47
  from ..free_init_utils import FreeInitMixin
49
48
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
50
49
 
@@ -61,6 +60,7 @@ EXAMPLE_DOC_STRING = """
61
60
  ... PIAPipeline,
62
61
  ... )
63
62
  >>> from diffusers.utils import export_to_gif, load_image
63
+
64
64
  >>> adapter = MotionAdapter.from_pretrained("../checkpoints/pia-diffusers")
65
65
  >>> pipe = PIAPipeline.from_pretrained("SG161222/Realistic_Vision_V6.0_B1_noVAE", motion_adapter=adapter)
66
66
  >>> pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
@@ -90,28 +90,6 @@ RANGE_LIST = [
90
90
  ]
91
91
 
92
92
 
93
- # Copied from diffusers.pipelines.animatediff.pipeline_animatediff.tensor2vid
94
- def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: str = "np"):
95
- batch_size, channels, num_frames, height, width = video.shape
96
- outputs = []
97
- for batch_idx in range(batch_size):
98
- batch_vid = video[batch_idx].permute(1, 0, 2, 3)
99
- batch_output = processor.postprocess(batch_vid, output_type)
100
-
101
- outputs.append(batch_output)
102
-
103
- if output_type == "np":
104
- outputs = np.stack(outputs)
105
-
106
- elif output_type == "pt":
107
- outputs = torch.stack(outputs)
108
-
109
- elif not output_type == "pil":
110
- raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil']")
111
-
112
- return outputs
113
-
114
-
115
93
  def prepare_mask_coef_by_statistics(num_frames: int, cond_frame: int, motion_scale: int):
116
94
  assert num_frames > 0, "video_length should be greater than 0"
117
95
 
@@ -130,71 +108,6 @@ def prepare_mask_coef_by_statistics(num_frames: int, cond_frame: int, motion_sca
130
108
  return coef
131
109
 
132
110
 
133
- def _get_freeinit_freq_filter(
134
- shape: Tuple[int, ...],
135
- device: Union[str, torch.dtype],
136
- filter_type: str,
137
- order: float,
138
- spatial_stop_frequency: float,
139
- temporal_stop_frequency: float,
140
- ) -> torch.Tensor:
141
- r"""Returns the FreeInit filter based on filter type and other input conditions."""
142
-
143
- time, height, width = shape[-3], shape[-2], shape[-1]
144
- mask = torch.zeros(shape)
145
-
146
- if spatial_stop_frequency == 0 or temporal_stop_frequency == 0:
147
- return mask
148
-
149
- if filter_type == "butterworth":
150
-
151
- def retrieve_mask(x):
152
- return 1 / (1 + (x / spatial_stop_frequency**2) ** order)
153
- elif filter_type == "gaussian":
154
-
155
- def retrieve_mask(x):
156
- return math.exp(-1 / (2 * spatial_stop_frequency**2) * x)
157
- elif filter_type == "ideal":
158
-
159
- def retrieve_mask(x):
160
- return 1 if x <= spatial_stop_frequency * 2 else 0
161
- else:
162
- raise NotImplementedError("`filter_type` must be one of gaussian, butterworth or ideal")
163
-
164
- for t in range(time):
165
- for h in range(height):
166
- for w in range(width):
167
- d_square = (
168
- ((spatial_stop_frequency / temporal_stop_frequency) * (2 * t / time - 1)) ** 2
169
- + (2 * h / height - 1) ** 2
170
- + (2 * w / width - 1) ** 2
171
- )
172
- mask[..., t, h, w] = retrieve_mask(d_square)
173
-
174
- return mask.to(device)
175
-
176
-
177
- def _freq_mix_3d(x: torch.Tensor, noise: torch.Tensor, LPF: torch.Tensor) -> torch.Tensor:
178
- r"""Noise reinitialization."""
179
- # FFT
180
- x_freq = fft.fftn(x, dim=(-3, -2, -1))
181
- x_freq = fft.fftshift(x_freq, dim=(-3, -2, -1))
182
- noise_freq = fft.fftn(noise, dim=(-3, -2, -1))
183
- noise_freq = fft.fftshift(noise_freq, dim=(-3, -2, -1))
184
-
185
- # frequency mix
186
- HPF = 1 - LPF
187
- x_freq_low = x_freq * LPF
188
- noise_freq_high = noise_freq * HPF
189
- x_freq_mixed = x_freq_low + noise_freq_high # mix in freq domain
190
-
191
- # IFFT
192
- x_freq_mixed = fft.ifftshift(x_freq_mixed, dim=(-3, -2, -1))
193
- x_mixed = fft.ifftn(x_freq_mixed, dim=(-3, -2, -1)).real
194
-
195
- return x_mixed
196
-
197
-
198
111
  @dataclass
199
112
  class PIAPipelineOutput(BaseOutput):
200
113
  r"""
@@ -202,9 +115,9 @@ class PIAPipelineOutput(BaseOutput):
202
115
 
203
116
  Args:
204
117
  frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
205
- Nested list of length `batch_size` with denoised PIL image sequences of length `num_frames`,
206
- NumPy array of shape `(batch_size, num_frames, channels, height, width,
207
- Torch tensor of shape `(batch_size, num_frames, channels, height, width)`.
118
+ Nested list of length `batch_size` with denoised PIL image sequences of length `num_frames`, NumPy array of
119
+ shape `(batch_size, num_frames, channels, height, width, Torch tensor of shape `(batch_size, num_frames,
120
+ channels, height, width)`.
208
121
  """
209
122
 
210
123
  frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]
@@ -284,7 +197,7 @@ class PIAPipeline(
284
197
  image_encoder=image_encoder,
285
198
  )
286
199
  self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
287
- self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
200
+ self.video_processor = VideoProcessor(do_resize=False, vae_scale_factor=self.vae_scale_factor)
288
201
 
289
202
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt with num_images_per_prompt -> num_videos_per_prompt
290
203
  def encode_prompt(
@@ -294,8 +207,8 @@ class PIAPipeline(
294
207
  num_images_per_prompt,
295
208
  do_classifier_free_guidance,
296
209
  negative_prompt=None,
297
- prompt_embeds: Optional[torch.FloatTensor] = None,
298
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
210
+ prompt_embeds: Optional[torch.Tensor] = None,
211
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
299
212
  lora_scale: Optional[float] = None,
300
213
  clip_skip: Optional[int] = None,
301
214
  ):
@@ -315,10 +228,10 @@ class PIAPipeline(
315
228
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
316
229
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
317
230
  less than `1`).
318
- prompt_embeds (`torch.FloatTensor`, *optional*):
231
+ prompt_embeds (`torch.Tensor`, *optional*):
319
232
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
320
233
  provided, text embeddings will be generated from `prompt` input argument.
321
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
234
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
322
235
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
323
236
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
324
237
  argument.
@@ -687,7 +600,7 @@ class PIAPipeline(
687
600
  )
688
601
  _, _, _, scaled_height, scaled_width = shape
689
602
 
690
- image = self.image_processor.preprocess(image)
603
+ image = self.video_processor.preprocess(image)
691
604
  image = image.to(device, dtype)
692
605
 
693
606
  if isinstance(generator, list):
@@ -767,11 +680,11 @@ class PIAPipeline(
767
680
  num_videos_per_prompt: Optional[int] = 1,
768
681
  eta: float = 0.0,
769
682
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
770
- latents: Optional[torch.FloatTensor] = None,
771
- prompt_embeds: Optional[torch.FloatTensor] = None,
772
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
683
+ latents: Optional[torch.Tensor] = None,
684
+ prompt_embeds: Optional[torch.Tensor] = None,
685
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
773
686
  ip_adapter_image: Optional[PipelineImageInput] = None,
774
- ip_adapter_image_embeds: Optional[List[torch.FloatTensor]] = None,
687
+ ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
775
688
  motion_scale: int = 0,
776
689
  output_type: Optional[str] = "pil",
777
690
  return_dict: bool = True,
@@ -788,7 +701,8 @@ class PIAPipeline(
788
701
  The input image to be used for video generation.
789
702
  prompt (`str` or `List[str]`, *optional*):
790
703
  The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
791
- strength (`float`, *optional*, defaults to 1.0): Indicates extent to transform the reference `image`. Must be between 0 and 1.
704
+ strength (`float`, *optional*, defaults to 1.0):
705
+ Indicates extent to transform the reference `image`. Must be between 0 and 1.
792
706
  height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
793
707
  The height in pixels of the generated video.
794
708
  width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
@@ -811,33 +725,31 @@ class PIAPipeline(
811
725
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
812
726
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
813
727
  generation deterministic.
814
- latents (`torch.FloatTensor`, *optional*):
728
+ latents (`torch.Tensor`, *optional*):
815
729
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video
816
730
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
817
731
  tensor is generated by sampling using the supplied random `generator`. Latents should be of shape
818
732
  `(batch_size, num_channel, num_frames, height, width)`.
819
- prompt_embeds (`torch.FloatTensor`, *optional*):
733
+ prompt_embeds (`torch.Tensor`, *optional*):
820
734
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
821
735
  provided, text embeddings are generated from the `prompt` input argument.
822
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
736
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
823
737
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
824
738
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
825
739
  ip_adapter_image: (`PipelineImageInput`, *optional*):
826
740
  Optional image input to work with IP Adapters.
827
- ip_adapter_image_embeds (`List[torch.FloatTensor]`, *optional*):
828
- Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of IP-adapters.
829
- Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should contain the negative image embedding
830
- if `do_classifier_free_guidance` is set to `True`.
831
- If not provided, embeddings are computed from the `ip_adapter_image` input argument.
741
+ ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
742
+ Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
743
+ IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
744
+ contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
745
+ provided, embeddings are computed from the `ip_adapter_image` input argument.
832
746
  motion_scale: (`int`, *optional*, defaults to 0):
833
- Parameter that controls the amount and type of motion that is added to the image. Increasing the value increases the amount of motion, while specific
834
- ranges of values control the type of motion that is added. Must be between 0 and 8.
835
- Set between 0-2 to only increase the amount of motion.
836
- Set between 3-5 to create looping motion.
837
- Set between 6-8 to perform motion with image style transfer.
747
+ Parameter that controls the amount and type of motion that is added to the image. Increasing the value
748
+ increases the amount of motion, while specific ranges of values control the type of motion that is
749
+ added. Must be between 0 and 8. Set between 0-2 to only increase the amount of motion. Set between 3-5
750
+ to create looping motion. Set between 6-8 to perform motion with image style transfer.
838
751
  output_type (`str`, *optional*, defaults to `"pil"`):
839
- The output format of the generated video. Choose between `torch.FloatTensor`, `PIL.Image` or
840
- `np.array`.
752
+ The output format of the generated video. Choose between `torch.Tensor`, `PIL.Image` or `np.array`.
841
753
  return_dict (`bool`, *optional*, defaults to `True`):
842
754
  Whether or not to return a [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] instead
843
755
  of a plain tuple.
@@ -855,14 +767,14 @@ class PIAPipeline(
855
767
  callback_on_step_end_tensor_inputs (`List`, *optional*):
856
768
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
857
769
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
858
- `._callback_tensor_inputs` attribute of your pipeine class.
770
+ `._callback_tensor_inputs` attribute of your pipeline class.
859
771
 
860
772
  Examples:
861
773
 
862
774
  Returns:
863
775
  [`~pipelines.pia.pipeline_pia.PIAPipelineOutput`] or `tuple`:
864
- If `return_dict` is `True`, [`~pipelines.pia.pipeline_pia.PIAPipelineOutput`] is
865
- returned, otherwise a `tuple` is returned where the first element is a list with the generated frames.
776
+ If `return_dict` is `True`, [`~pipelines.pia.pipeline_pia.PIAPipelineOutput`] is returned, otherwise a
777
+ `tuple` is returned where the first element is a list with the generated frames.
866
778
  """
867
779
  # 0. Default height and width to unet
868
780
  height = height or self.unet.config.sample_size * self.vae_scale_factor
@@ -979,8 +891,10 @@ class PIAPipeline(
979
891
  latents, free_init_iter, num_inference_steps, device, latents.dtype, generator
980
892
  )
981
893
 
894
+ self._num_timesteps = len(timesteps)
982
895
  num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
983
- with self.progress_bar(total=num_inference_steps) as progress_bar:
896
+
897
+ with self.progress_bar(total=self._num_timesteps) as progress_bar:
984
898
  for i, t in enumerate(timesteps):
985
899
  # expand the latents if we are doing classifier free guidance
986
900
  latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
@@ -1023,7 +937,7 @@ class PIAPipeline(
1023
937
  video = latents
1024
938
  else:
1025
939
  video_tensor = self.decode_latents(latents)
1026
- video = tensor2vid(video_tensor, self.image_processor, output_type=output_type)
940
+ video = self.video_processor.postprocess_video(video=video_tensor, output_type=output_type)
1027
941
 
1028
942
  # 10. Offload all models
1029
943
  self.maybe_free_model_hooks()
@@ -254,9 +254,9 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
254
254
  force_download (`bool`, *optional*, defaults to `False`):
255
255
  Whether or not to force the (re-)download of the model weights and configuration files, overriding the
256
256
  cached versions if they exist.
257
- resume_download (`bool`, *optional*, defaults to `False`):
258
- Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
259
- incompletely downloaded files are deleted.
257
+ resume_download:
258
+ Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
259
+ of Diffusers.
260
260
  proxies (`Dict[str, str]`, *optional*):
261
261
  A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
262
262
  'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -316,7 +316,7 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
316
316
  ```
317
317
  """
318
318
  cache_dir = kwargs.pop("cache_dir", None)
319
- resume_download = kwargs.pop("resume_download", False)
319
+ resume_download = kwargs.pop("resume_download", None)
320
320
  proxies = kwargs.pop("proxies", None)
321
321
  local_files_only = kwargs.pop("local_files_only", False)
322
322
  token = kwargs.pop("token", None)