diffusers 0.27.2__py3-none-any.whl → 0.28.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. diffusers/__init__.py +26 -1
  2. diffusers/callbacks.py +156 -0
  3. diffusers/commands/env.py +110 -6
  4. diffusers/configuration_utils.py +33 -11
  5. diffusers/dependency_versions_table.py +2 -1
  6. diffusers/image_processor.py +158 -45
  7. diffusers/loaders/__init__.py +2 -5
  8. diffusers/loaders/autoencoder.py +4 -4
  9. diffusers/loaders/controlnet.py +4 -4
  10. diffusers/loaders/ip_adapter.py +80 -22
  11. diffusers/loaders/lora.py +134 -20
  12. diffusers/loaders/lora_conversion_utils.py +46 -43
  13. diffusers/loaders/peft.py +4 -3
  14. diffusers/loaders/single_file.py +401 -170
  15. diffusers/loaders/single_file_model.py +290 -0
  16. diffusers/loaders/single_file_utils.py +616 -672
  17. diffusers/loaders/textual_inversion.py +41 -20
  18. diffusers/loaders/unet.py +168 -115
  19. diffusers/loaders/unet_loader_utils.py +163 -0
  20. diffusers/models/__init__.py +8 -0
  21. diffusers/models/activations.py +23 -3
  22. diffusers/models/attention.py +10 -11
  23. diffusers/models/attention_processor.py +475 -148
  24. diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
  25. diffusers/models/autoencoders/autoencoder_kl.py +18 -19
  26. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
  27. diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
  28. diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
  29. diffusers/models/autoencoders/vae.py +23 -24
  30. diffusers/models/controlnet.py +12 -9
  31. diffusers/models/controlnet_flax.py +4 -4
  32. diffusers/models/controlnet_xs.py +1915 -0
  33. diffusers/models/downsampling.py +17 -18
  34. diffusers/models/embeddings.py +363 -32
  35. diffusers/models/model_loading_utils.py +177 -0
  36. diffusers/models/modeling_flax_pytorch_utils.py +2 -1
  37. diffusers/models/modeling_flax_utils.py +4 -4
  38. diffusers/models/modeling_outputs.py +14 -0
  39. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  40. diffusers/models/modeling_utils.py +175 -99
  41. diffusers/models/normalization.py +2 -1
  42. diffusers/models/resnet.py +18 -23
  43. diffusers/models/transformer_temporal.py +3 -3
  44. diffusers/models/transformers/__init__.py +3 -0
  45. diffusers/models/transformers/dit_transformer_2d.py +240 -0
  46. diffusers/models/transformers/dual_transformer_2d.py +4 -4
  47. diffusers/models/transformers/hunyuan_transformer_2d.py +427 -0
  48. diffusers/models/transformers/pixart_transformer_2d.py +336 -0
  49. diffusers/models/transformers/prior_transformer.py +7 -7
  50. diffusers/models/transformers/t5_film_transformer.py +17 -19
  51. diffusers/models/transformers/transformer_2d.py +292 -184
  52. diffusers/models/transformers/transformer_temporal.py +10 -10
  53. diffusers/models/unets/unet_1d.py +5 -5
  54. diffusers/models/unets/unet_1d_blocks.py +29 -29
  55. diffusers/models/unets/unet_2d.py +6 -6
  56. diffusers/models/unets/unet_2d_blocks.py +137 -128
  57. diffusers/models/unets/unet_2d_condition.py +19 -15
  58. diffusers/models/unets/unet_2d_condition_flax.py +6 -5
  59. diffusers/models/unets/unet_3d_blocks.py +79 -77
  60. diffusers/models/unets/unet_3d_condition.py +13 -9
  61. diffusers/models/unets/unet_i2vgen_xl.py +14 -13
  62. diffusers/models/unets/unet_kandinsky3.py +1 -1
  63. diffusers/models/unets/unet_motion_model.py +114 -14
  64. diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
  65. diffusers/models/unets/unet_stable_cascade.py +16 -13
  66. diffusers/models/upsampling.py +17 -20
  67. diffusers/models/vq_model.py +16 -15
  68. diffusers/pipelines/__init__.py +27 -3
  69. diffusers/pipelines/amused/pipeline_amused.py +12 -12
  70. diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
  71. diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
  72. diffusers/pipelines/animatediff/__init__.py +2 -0
  73. diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
  74. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
  75. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
  76. diffusers/pipelines/animatediff/pipeline_output.py +3 -2
  77. diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
  78. diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
  79. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
  80. diffusers/pipelines/auto_pipeline.py +21 -17
  81. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  82. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
  83. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
  84. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
  85. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
  86. diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
  87. diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
  88. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
  89. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
  90. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
  91. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
  92. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
  93. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
  94. diffusers/pipelines/controlnet_xs/__init__.py +68 -0
  95. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
  96. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
  97. diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
  98. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
  99. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
  100. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
  101. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
  102. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
  103. diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
  104. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
  105. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
  106. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
  107. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
  108. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
  109. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -18
  110. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
  111. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
  112. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
  113. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
  114. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
  115. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
  116. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
  117. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
  118. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
  119. diffusers/pipelines/dit/pipeline_dit.py +7 -4
  120. diffusers/pipelines/free_init_utils.py +39 -38
  121. diffusers/pipelines/hunyuandit/__init__.py +48 -0
  122. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +881 -0
  123. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
  124. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
  125. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
  126. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
  127. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
  128. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
  129. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  130. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
  131. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
  132. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
  133. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
  134. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
  135. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
  136. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
  137. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
  138. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
  139. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
  140. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
  141. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
  142. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
  143. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
  144. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
  145. diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
  146. diffusers/pipelines/marigold/__init__.py +50 -0
  147. diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
  148. diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
  149. diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
  150. diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
  151. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
  152. diffusers/pipelines/pia/pipeline_pia.py +39 -125
  153. diffusers/pipelines/pipeline_flax_utils.py +4 -4
  154. diffusers/pipelines/pipeline_loading_utils.py +269 -23
  155. diffusers/pipelines/pipeline_utils.py +266 -37
  156. diffusers/pipelines/pixart_alpha/__init__.py +8 -1
  157. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +69 -79
  158. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
  159. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
  160. diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
  161. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
  162. diffusers/pipelines/shap_e/renderer.py +1 -1
  163. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +18 -18
  164. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
  165. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
  166. diffusers/pipelines/stable_diffusion/__init__.py +0 -1
  167. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
  168. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  169. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
  170. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
  171. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
  172. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
  173. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
  174. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
  175. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
  176. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
  177. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
  178. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
  179. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
  180. diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
  181. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
  182. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -39
  183. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
  184. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
  185. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
  186. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
  187. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
  188. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
  189. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
  190. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  191. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
  192. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
  193. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
  194. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
  195. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
  196. diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
  197. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
  198. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
  199. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
  200. diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
  201. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
  202. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
  203. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
  204. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
  205. diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
  206. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
  207. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
  208. diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
  209. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
  210. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
  211. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
  212. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
  213. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
  214. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
  215. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
  216. diffusers/schedulers/__init__.py +2 -2
  217. diffusers/schedulers/deprecated/__init__.py +1 -1
  218. diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
  219. diffusers/schedulers/scheduling_amused.py +5 -5
  220. diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
  221. diffusers/schedulers/scheduling_consistency_models.py +20 -26
  222. diffusers/schedulers/scheduling_ddim.py +22 -24
  223. diffusers/schedulers/scheduling_ddim_flax.py +2 -1
  224. diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
  225. diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
  226. diffusers/schedulers/scheduling_ddpm.py +20 -22
  227. diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
  228. diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
  229. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
  230. diffusers/schedulers/scheduling_deis_multistep.py +42 -42
  231. diffusers/schedulers/scheduling_dpmsolver_multistep.py +103 -77
  232. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
  233. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
  234. diffusers/schedulers/scheduling_dpmsolver_sde.py +23 -23
  235. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +86 -65
  236. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +75 -54
  237. diffusers/schedulers/scheduling_edm_euler.py +50 -31
  238. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +23 -29
  239. diffusers/schedulers/scheduling_euler_discrete.py +160 -68
  240. diffusers/schedulers/scheduling_heun_discrete.py +57 -39
  241. diffusers/schedulers/scheduling_ipndm.py +8 -8
  242. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +19 -19
  243. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +19 -19
  244. diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
  245. diffusers/schedulers/scheduling_lcm.py +21 -23
  246. diffusers/schedulers/scheduling_lms_discrete.py +24 -26
  247. diffusers/schedulers/scheduling_pndm.py +20 -20
  248. diffusers/schedulers/scheduling_repaint.py +20 -20
  249. diffusers/schedulers/scheduling_sasolver.py +55 -54
  250. diffusers/schedulers/scheduling_sde_ve.py +19 -19
  251. diffusers/schedulers/scheduling_tcd.py +39 -30
  252. diffusers/schedulers/scheduling_unclip.py +15 -15
  253. diffusers/schedulers/scheduling_unipc_multistep.py +111 -41
  254. diffusers/schedulers/scheduling_utils.py +14 -5
  255. diffusers/schedulers/scheduling_utils_flax.py +3 -3
  256. diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
  257. diffusers/training_utils.py +56 -1
  258. diffusers/utils/__init__.py +7 -0
  259. diffusers/utils/doc_utils.py +1 -0
  260. diffusers/utils/dummy_pt_objects.py +75 -0
  261. diffusers/utils/dummy_torch_and_transformers_objects.py +105 -0
  262. diffusers/utils/dynamic_modules_utils.py +24 -11
  263. diffusers/utils/hub_utils.py +3 -2
  264. diffusers/utils/import_utils.py +91 -0
  265. diffusers/utils/loading_utils.py +2 -2
  266. diffusers/utils/logging.py +1 -1
  267. diffusers/utils/peft_utils.py +32 -5
  268. diffusers/utils/state_dict_utils.py +11 -2
  269. diffusers/utils/testing_utils.py +71 -6
  270. diffusers/utils/torch_utils.py +1 -0
  271. diffusers/video_processor.py +113 -0
  272. {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/METADATA +7 -7
  273. diffusers-0.28.1.dist-info/RECORD +419 -0
  274. diffusers-0.27.2.dist-info/RECORD +0 -399
  275. {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/LICENSE +0 -0
  276. {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/WHEEL +0 -0
  277. {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/entry_points.txt +0 -0
  278. {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/top_level.txt +0 -0
@@ -197,7 +197,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
197
197
  and not isinstance(image, list)
198
198
  ):
199
199
  raise ValueError(
200
- "`image` has to be of type `torch.FloatTensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
200
+ "`image` has to be of type `torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
201
201
  f" {type(image)}"
202
202
  )
203
203
 
@@ -214,7 +214,12 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
214
214
 
215
215
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
216
216
  def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
217
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
217
+ shape = (
218
+ batch_size,
219
+ num_channels_latents,
220
+ int(height) // self.vae_scale_factor,
221
+ int(width) // self.vae_scale_factor,
222
+ )
218
223
  if isinstance(generator, list) and len(generator) != batch_size:
219
224
  raise ValueError(
220
225
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -242,10 +247,10 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
242
247
  num_images_per_prompt: Optional[int] = 1,
243
248
  eta: float = 0.0,
244
249
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
245
- latents: Optional[torch.FloatTensor] = None,
250
+ latents: Optional[torch.Tensor] = None,
246
251
  output_type: Optional[str] = "pil",
247
252
  return_dict: bool = True,
248
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
253
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
249
254
  callback_steps: int = 1,
250
255
  **kwargs,
251
256
  ):
@@ -276,7 +281,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
276
281
  generator (`torch.Generator`, *optional*):
277
282
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
278
283
  generation deterministic.
279
- latents (`torch.FloatTensor`, *optional*):
284
+ latents (`torch.Tensor`, *optional*):
280
285
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
281
286
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
282
287
  tensor is generated by sampling using the supplied random `generator`.
@@ -287,7 +292,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
287
292
  plain tuple.
288
293
  callback (`Callable`, *optional*):
289
294
  A function that calls every `callback_steps` steps during inference. The function is called with the
290
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
295
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
291
296
  callback_steps (`int`, *optional*, defaults to 1):
292
297
  The frequency at which the `callback` function is called. If not specified, the callback is called at
293
298
  every step.
@@ -300,7 +300,12 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
300
300
 
301
301
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
302
302
  def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
303
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
303
+ shape = (
304
+ batch_size,
305
+ num_channels_latents,
306
+ int(height) // self.vae_scale_factor,
307
+ int(width) // self.vae_scale_factor,
308
+ )
304
309
  if isinstance(generator, list) and len(generator) != batch_size:
305
310
  raise ValueError(
306
311
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -328,10 +333,10 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
328
333
  num_images_per_prompt: Optional[int] = 1,
329
334
  eta: float = 0.0,
330
335
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
331
- latents: Optional[torch.FloatTensor] = None,
336
+ latents: Optional[torch.Tensor] = None,
332
337
  output_type: Optional[str] = "pil",
333
338
  return_dict: bool = True,
334
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
339
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
335
340
  callback_steps: int = 1,
336
341
  **kwargs,
337
342
  ):
@@ -362,7 +367,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
362
367
  generator (`torch.Generator`, *optional*):
363
368
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
364
369
  generation deterministic.
365
- latents (`torch.FloatTensor`, *optional*):
370
+ latents (`torch.Tensor`, *optional*):
366
371
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
367
372
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
368
373
  tensor is generated by sampling using the supplied random `generator`.
@@ -373,7 +378,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
373
378
  plain tuple.
374
379
  callback (`Callable`, *optional*):
375
380
  A function that calls every `callback_steps` steps during inference. The function is called with the
376
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
381
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
377
382
  callback_steps (`int`, *optional*, defaults to 1):
378
383
  The frequency at which the `callback` function is called. If not specified, the callback is called at
379
384
  every step.
@@ -169,10 +169,10 @@ class VQDiffusionPipeline(DiffusionPipeline):
169
169
  truncation_rate: float = 1.0,
170
170
  num_images_per_prompt: int = 1,
171
171
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
172
- latents: Optional[torch.FloatTensor] = None,
172
+ latents: Optional[torch.Tensor] = None,
173
173
  output_type: Optional[str] = "pil",
174
174
  return_dict: bool = True,
175
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
175
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
176
176
  callback_steps: int = 1,
177
177
  ) -> Union[ImagePipelineOutput, Tuple]:
178
178
  """
@@ -196,7 +196,7 @@ class VQDiffusionPipeline(DiffusionPipeline):
196
196
  generator (`torch.Generator`, *optional*):
197
197
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
198
198
  generation deterministic.
199
- latents (`torch.FloatTensor` of shape (batch), *optional*):
199
+ latents (`torch.Tensor` of shape (batch), *optional*):
200
200
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
201
201
  generation. Must be valid embedding indices.If not provided, a latents tensor will be generated of
202
202
  completely masked latent pixels.
@@ -206,7 +206,7 @@ class VQDiffusionPipeline(DiffusionPipeline):
206
206
  Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
207
207
  callback (`Callable`, *optional*):
208
208
  A function that calls every `callback_steps` steps during inference. The function is called with the
209
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
209
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
210
210
  callback_steps (`int`, *optional*, defaults to 1):
211
211
  The frequency at which the `callback` function is called. If not specified, the callback is called at
212
212
  every step.
@@ -301,7 +301,7 @@ class VQDiffusionPipeline(DiffusionPipeline):
301
301
 
302
302
  return ImagePipelineOutput(images=image)
303
303
 
304
- def truncate(self, log_p_x_0: torch.FloatTensor, truncation_rate: float) -> torch.FloatTensor:
304
+ def truncate(self, log_p_x_0: torch.Tensor, truncation_rate: float) -> torch.Tensor:
305
305
  """
306
306
  Truncates `log_p_x_0` such that for each column vector, the total cumulative probability is `truncation_rate`
307
307
  The lowest probabilities that would increase the cumulative probability above `truncation_rate` are set to
@@ -22,7 +22,7 @@ from typing import Dict, List, Optional, Tuple, Union
22
22
 
23
23
  import torch
24
24
 
25
- from ...models import AutoencoderKL, Transformer2DModel
25
+ from ...models import AutoencoderKL, DiTTransformer2DModel
26
26
  from ...schedulers import KarrasDiffusionSchedulers
27
27
  from ...utils.torch_utils import randn_tensor
28
28
  from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
@@ -36,8 +36,8 @@ class DiTPipeline(DiffusionPipeline):
36
36
  implemented for all pipelines (downloading, saving, running on a particular device, etc.).
37
37
 
38
38
  Parameters:
39
- transformer ([`Transformer2DModel`]):
40
- A class conditioned `Transformer2DModel` to denoise the encoded image latents.
39
+ transformer ([`DiTTransformer2DModel`]):
40
+ A class conditioned `DiTTransformer2DModel` to denoise the encoded image latents.
41
41
  vae ([`AutoencoderKL`]):
42
42
  Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
43
43
  scheduler ([`DDIMScheduler`]):
@@ -48,7 +48,7 @@ class DiTPipeline(DiffusionPipeline):
48
48
 
49
49
  def __init__(
50
50
  self,
51
- transformer: Transformer2DModel,
51
+ transformer: DiTTransformer2DModel,
52
52
  vae: AutoencoderKL,
53
53
  scheduler: KarrasDiffusionSchedulers,
54
54
  id2label: Optional[Dict[int, str]] = None,
@@ -227,6 +227,9 @@ class DiTPipeline(DiffusionPipeline):
227
227
  if output_type == "pil":
228
228
  samples = self.numpy_to_pil(samples)
229
229
 
230
+ # Offload all models
231
+ self.maybe_free_model_hooks()
232
+
230
233
  if not return_dict:
231
234
  return (samples,)
232
235
 
@@ -41,20 +41,20 @@ class FreeInitMixin:
41
41
  num_iters (`int`, *optional*, defaults to `3`):
42
42
  Number of FreeInit noise re-initialization iterations.
43
43
  use_fast_sampling (`bool`, *optional*, defaults to `False`):
44
- Whether or not to speedup sampling procedure at the cost of probably lower quality results. Enables
45
- the "Coarse-to-Fine Sampling" strategy, as mentioned in the paper, if set to `True`.
44
+ Whether or not to speedup sampling procedure at the cost of probably lower quality results. Enables the
45
+ "Coarse-to-Fine Sampling" strategy, as mentioned in the paper, if set to `True`.
46
46
  method (`str`, *optional*, defaults to `butterworth`):
47
- Must be one of `butterworth`, `ideal` or `gaussian` to use as the filtering method for the
48
- FreeInit low pass filter.
47
+ Must be one of `butterworth`, `ideal` or `gaussian` to use as the filtering method for the FreeInit low
48
+ pass filter.
49
49
  order (`int`, *optional*, defaults to `4`):
50
50
  Order of the filter used in `butterworth` method. Larger values lead to `ideal` method behaviour
51
51
  whereas lower values lead to `gaussian` method behaviour.
52
52
  spatial_stop_frequency (`float`, *optional*, defaults to `0.25`):
53
- Normalized stop frequency for spatial dimensions. Must be between 0 to 1. Referred to as `d_s` in
54
- the original implementation.
53
+ Normalized stop frequency for spatial dimensions. Must be between 0 to 1. Referred to as `d_s` in the
54
+ original implementation.
55
55
  temporal_stop_frequency (`float`, *optional*, defaults to `0.25`):
56
- Normalized stop frequency for temporal dimensions. Must be between 0 to 1. Referred to as `d_t` in
57
- the original implementation.
56
+ Normalized stop frequency for temporal dimensions. Must be between 0 to 1. Referred to as `d_t` in the
57
+ original implementation.
58
58
  """
59
59
  self._free_init_num_iters = num_iters
60
60
  self._free_init_use_fast_sampling = use_fast_sampling
@@ -146,39 +146,40 @@ class FreeInitMixin:
146
146
  ):
147
147
  if free_init_iteration == 0:
148
148
  self._free_init_initial_noise = latents.detach().clone()
149
- return latents, self.scheduler.timesteps
150
-
151
- latent_shape = latents.shape
152
-
153
- free_init_filter_shape = (1, *latent_shape[1:])
154
- free_init_freq_filter = self._get_free_init_freq_filter(
155
- shape=free_init_filter_shape,
156
- device=device,
157
- filter_type=self._free_init_method,
158
- order=self._free_init_order,
159
- spatial_stop_frequency=self._free_init_spatial_stop_frequency,
160
- temporal_stop_frequency=self._free_init_temporal_stop_frequency,
161
- )
162
-
163
- current_diffuse_timestep = self.scheduler.config.num_train_timesteps - 1
164
- diffuse_timesteps = torch.full((latent_shape[0],), current_diffuse_timestep).long()
165
-
166
- z_t = self.scheduler.add_noise(
167
- original_samples=latents, noise=self._free_init_initial_noise, timesteps=diffuse_timesteps.to(device)
168
- ).to(dtype=torch.float32)
169
-
170
- z_rand = randn_tensor(
171
- shape=latent_shape,
172
- generator=generator,
173
- device=device,
174
- dtype=torch.float32,
175
- )
176
- latents = self._apply_freq_filter(z_t, z_rand, low_pass_filter=free_init_freq_filter)
177
- latents = latents.to(dtype)
149
+ else:
150
+ latent_shape = latents.shape
151
+
152
+ free_init_filter_shape = (1, *latent_shape[1:])
153
+ free_init_freq_filter = self._get_free_init_freq_filter(
154
+ shape=free_init_filter_shape,
155
+ device=device,
156
+ filter_type=self._free_init_method,
157
+ order=self._free_init_order,
158
+ spatial_stop_frequency=self._free_init_spatial_stop_frequency,
159
+ temporal_stop_frequency=self._free_init_temporal_stop_frequency,
160
+ )
161
+
162
+ current_diffuse_timestep = self.scheduler.config.num_train_timesteps - 1
163
+ diffuse_timesteps = torch.full((latent_shape[0],), current_diffuse_timestep).long()
164
+
165
+ z_t = self.scheduler.add_noise(
166
+ original_samples=latents, noise=self._free_init_initial_noise, timesteps=diffuse_timesteps.to(device)
167
+ ).to(dtype=torch.float32)
168
+
169
+ z_rand = randn_tensor(
170
+ shape=latent_shape,
171
+ generator=generator,
172
+ device=device,
173
+ dtype=torch.float32,
174
+ )
175
+ latents = self._apply_freq_filter(z_t, z_rand, low_pass_filter=free_init_freq_filter)
176
+ latents = latents.to(dtype)
178
177
 
179
178
  # Coarse-to-Fine Sampling for faster inference (can lead to lower quality)
180
179
  if self._free_init_use_fast_sampling:
181
- num_inference_steps = int(num_inference_steps / self._free_init_num_iters * (free_init_iteration + 1))
180
+ num_inference_steps = max(
181
+ 1, int(num_inference_steps / self._free_init_num_iters * (free_init_iteration + 1))
182
+ )
182
183
  self.scheduler.set_timesteps(num_inference_steps, device=device)
183
184
 
184
185
  return latents, self.scheduler.timesteps
@@ -0,0 +1,48 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_torch_available,
9
+ is_transformers_available,
10
+ )
11
+
12
+
13
+ _dummy_objects = {}
14
+ _import_structure = {}
15
+
16
+
17
+ try:
18
+ if not (is_transformers_available() and is_torch_available()):
19
+ raise OptionalDependencyNotAvailable()
20
+ except OptionalDependencyNotAvailable:
21
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
22
+
23
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
24
+ else:
25
+ _import_structure["pipeline_hunyuandit"] = ["HunyuanDiTPipeline"]
26
+
27
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
28
+ try:
29
+ if not (is_transformers_available() and is_torch_available()):
30
+ raise OptionalDependencyNotAvailable()
31
+
32
+ except OptionalDependencyNotAvailable:
33
+ from ...utils.dummy_torch_and_transformers_objects import *
34
+ else:
35
+ from .pipeline_hunyuandit import HunyuanDiTPipeline
36
+
37
+ else:
38
+ import sys
39
+
40
+ sys.modules[__name__] = _LazyModule(
41
+ __name__,
42
+ globals()["__file__"],
43
+ _import_structure,
44
+ module_spec=__spec__,
45
+ )
46
+
47
+ for name, value in _dummy_objects.items():
48
+ setattr(sys.modules[__name__], name, value)