diffusers 0.27.2__py3-none-any.whl → 0.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. diffusers/__init__.py +18 -1
  2. diffusers/callbacks.py +156 -0
  3. diffusers/commands/env.py +110 -6
  4. diffusers/configuration_utils.py +16 -11
  5. diffusers/dependency_versions_table.py +2 -1
  6. diffusers/image_processor.py +158 -45
  7. diffusers/loaders/__init__.py +2 -5
  8. diffusers/loaders/autoencoder.py +4 -4
  9. diffusers/loaders/controlnet.py +4 -4
  10. diffusers/loaders/ip_adapter.py +80 -22
  11. diffusers/loaders/lora.py +134 -20
  12. diffusers/loaders/lora_conversion_utils.py +46 -43
  13. diffusers/loaders/peft.py +4 -3
  14. diffusers/loaders/single_file.py +401 -170
  15. diffusers/loaders/single_file_model.py +290 -0
  16. diffusers/loaders/single_file_utils.py +616 -672
  17. diffusers/loaders/textual_inversion.py +41 -20
  18. diffusers/loaders/unet.py +168 -115
  19. diffusers/loaders/unet_loader_utils.py +163 -0
  20. diffusers/models/__init__.py +2 -0
  21. diffusers/models/activations.py +11 -3
  22. diffusers/models/attention.py +10 -11
  23. diffusers/models/attention_processor.py +367 -148
  24. diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
  25. diffusers/models/autoencoders/autoencoder_kl.py +18 -19
  26. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
  27. diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
  28. diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
  29. diffusers/models/autoencoders/vae.py +23 -24
  30. diffusers/models/controlnet.py +12 -9
  31. diffusers/models/controlnet_flax.py +4 -4
  32. diffusers/models/controlnet_xs.py +1915 -0
  33. diffusers/models/downsampling.py +17 -18
  34. diffusers/models/embeddings.py +147 -24
  35. diffusers/models/model_loading_utils.py +149 -0
  36. diffusers/models/modeling_flax_pytorch_utils.py +2 -1
  37. diffusers/models/modeling_flax_utils.py +4 -4
  38. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  39. diffusers/models/modeling_utils.py +118 -98
  40. diffusers/models/resnet.py +18 -23
  41. diffusers/models/transformer_temporal.py +3 -3
  42. diffusers/models/transformers/dual_transformer_2d.py +4 -4
  43. diffusers/models/transformers/prior_transformer.py +7 -7
  44. diffusers/models/transformers/t5_film_transformer.py +17 -19
  45. diffusers/models/transformers/transformer_2d.py +272 -156
  46. diffusers/models/transformers/transformer_temporal.py +10 -10
  47. diffusers/models/unets/unet_1d.py +5 -5
  48. diffusers/models/unets/unet_1d_blocks.py +29 -29
  49. diffusers/models/unets/unet_2d.py +6 -6
  50. diffusers/models/unets/unet_2d_blocks.py +137 -128
  51. diffusers/models/unets/unet_2d_condition.py +19 -15
  52. diffusers/models/unets/unet_2d_condition_flax.py +6 -5
  53. diffusers/models/unets/unet_3d_blocks.py +79 -77
  54. diffusers/models/unets/unet_3d_condition.py +13 -9
  55. diffusers/models/unets/unet_i2vgen_xl.py +14 -13
  56. diffusers/models/unets/unet_kandinsky3.py +1 -1
  57. diffusers/models/unets/unet_motion_model.py +114 -14
  58. diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
  59. diffusers/models/unets/unet_stable_cascade.py +16 -13
  60. diffusers/models/upsampling.py +17 -20
  61. diffusers/models/vq_model.py +16 -15
  62. diffusers/pipelines/__init__.py +25 -3
  63. diffusers/pipelines/amused/pipeline_amused.py +12 -12
  64. diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
  65. diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
  66. diffusers/pipelines/animatediff/__init__.py +2 -0
  67. diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
  68. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
  69. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
  70. diffusers/pipelines/animatediff/pipeline_output.py +3 -2
  71. diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
  72. diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
  73. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
  74. diffusers/pipelines/auto_pipeline.py +21 -17
  75. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  76. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
  77. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
  78. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
  79. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
  80. diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
  81. diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
  82. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
  83. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
  84. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
  85. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
  86. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
  87. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
  88. diffusers/pipelines/controlnet_xs/__init__.py +68 -0
  89. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
  90. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
  91. diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
  92. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
  93. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
  94. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
  95. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
  96. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
  97. diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
  98. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
  99. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
  100. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
  101. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
  102. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
  103. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -18
  104. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
  105. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
  106. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
  107. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
  108. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
  109. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
  110. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
  111. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
  112. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
  113. diffusers/pipelines/dit/pipeline_dit.py +3 -0
  114. diffusers/pipelines/free_init_utils.py +39 -38
  115. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
  116. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
  117. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
  118. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
  119. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
  120. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
  121. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  122. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
  123. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
  124. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
  125. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
  126. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
  127. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
  128. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
  129. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
  130. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
  131. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
  132. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
  133. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
  134. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
  135. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
  136. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
  137. diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
  138. diffusers/pipelines/marigold/__init__.py +50 -0
  139. diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
  140. diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
  141. diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
  142. diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
  143. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
  144. diffusers/pipelines/pia/pipeline_pia.py +39 -125
  145. diffusers/pipelines/pipeline_flax_utils.py +4 -4
  146. diffusers/pipelines/pipeline_loading_utils.py +268 -23
  147. diffusers/pipelines/pipeline_utils.py +266 -37
  148. diffusers/pipelines/pixart_alpha/__init__.py +8 -1
  149. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
  150. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
  151. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
  152. diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
  153. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
  154. diffusers/pipelines/shap_e/renderer.py +1 -1
  155. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +18 -18
  156. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
  157. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
  158. diffusers/pipelines/stable_diffusion/__init__.py +0 -1
  159. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
  160. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  161. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
  162. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
  163. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
  164. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
  165. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
  166. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
  167. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
  168. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
  169. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
  170. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
  171. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
  172. diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
  173. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
  174. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -39
  175. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
  176. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
  177. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
  178. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
  179. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
  180. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
  181. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
  182. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  183. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
  184. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
  185. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
  186. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
  187. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
  188. diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
  189. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
  190. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
  191. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
  192. diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
  193. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
  194. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
  195. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
  196. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
  197. diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
  198. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
  199. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
  200. diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
  201. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
  202. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
  203. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
  204. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
  205. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
  206. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
  207. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
  208. diffusers/schedulers/__init__.py +2 -2
  209. diffusers/schedulers/deprecated/__init__.py +1 -1
  210. diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
  211. diffusers/schedulers/scheduling_amused.py +5 -5
  212. diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
  213. diffusers/schedulers/scheduling_consistency_models.py +20 -26
  214. diffusers/schedulers/scheduling_ddim.py +22 -24
  215. diffusers/schedulers/scheduling_ddim_flax.py +2 -1
  216. diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
  217. diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
  218. diffusers/schedulers/scheduling_ddpm.py +20 -22
  219. diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
  220. diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
  221. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
  222. diffusers/schedulers/scheduling_deis_multistep.py +42 -42
  223. diffusers/schedulers/scheduling_dpmsolver_multistep.py +103 -77
  224. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
  225. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
  226. diffusers/schedulers/scheduling_dpmsolver_sde.py +23 -23
  227. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +86 -65
  228. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +75 -54
  229. diffusers/schedulers/scheduling_edm_euler.py +50 -31
  230. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +23 -29
  231. diffusers/schedulers/scheduling_euler_discrete.py +160 -68
  232. diffusers/schedulers/scheduling_heun_discrete.py +57 -39
  233. diffusers/schedulers/scheduling_ipndm.py +8 -8
  234. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +19 -19
  235. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +19 -19
  236. diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
  237. diffusers/schedulers/scheduling_lcm.py +21 -23
  238. diffusers/schedulers/scheduling_lms_discrete.py +24 -26
  239. diffusers/schedulers/scheduling_pndm.py +20 -20
  240. diffusers/schedulers/scheduling_repaint.py +20 -20
  241. diffusers/schedulers/scheduling_sasolver.py +55 -54
  242. diffusers/schedulers/scheduling_sde_ve.py +19 -19
  243. diffusers/schedulers/scheduling_tcd.py +39 -30
  244. diffusers/schedulers/scheduling_unclip.py +15 -15
  245. diffusers/schedulers/scheduling_unipc_multistep.py +111 -41
  246. diffusers/schedulers/scheduling_utils.py +14 -5
  247. diffusers/schedulers/scheduling_utils_flax.py +3 -3
  248. diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
  249. diffusers/training_utils.py +56 -1
  250. diffusers/utils/__init__.py +7 -0
  251. diffusers/utils/doc_utils.py +1 -0
  252. diffusers/utils/dummy_pt_objects.py +30 -0
  253. diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
  254. diffusers/utils/dynamic_modules_utils.py +24 -11
  255. diffusers/utils/hub_utils.py +3 -2
  256. diffusers/utils/import_utils.py +91 -0
  257. diffusers/utils/loading_utils.py +2 -2
  258. diffusers/utils/logging.py +1 -1
  259. diffusers/utils/peft_utils.py +32 -5
  260. diffusers/utils/state_dict_utils.py +11 -2
  261. diffusers/utils/testing_utils.py +71 -6
  262. diffusers/utils/torch_utils.py +1 -0
  263. diffusers/video_processor.py +113 -0
  264. {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/METADATA +47 -47
  265. diffusers-0.28.0.dist-info/RECORD +414 -0
  266. {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/WHEEL +1 -1
  267. diffusers-0.27.2.dist-info/RECORD +0 -399
  268. {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
  269. {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
  270. {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -166,8 +166,8 @@ class StableUnCLIPImg2ImgPipeline(
166
166
  num_images_per_prompt,
167
167
  do_classifier_free_guidance,
168
168
  negative_prompt=None,
169
- prompt_embeds: Optional[torch.FloatTensor] = None,
170
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
169
+ prompt_embeds: Optional[torch.Tensor] = None,
170
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
171
171
  lora_scale: Optional[float] = None,
172
172
  **kwargs,
173
173
  ):
@@ -254,8 +254,8 @@ class StableUnCLIPImg2ImgPipeline(
254
254
  num_images_per_prompt,
255
255
  do_classifier_free_guidance,
256
256
  negative_prompt=None,
257
- prompt_embeds: Optional[torch.FloatTensor] = None,
258
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
257
+ prompt_embeds: Optional[torch.Tensor] = None,
258
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
259
259
  lora_scale: Optional[float] = None,
260
260
  clip_skip: Optional[int] = None,
261
261
  ):
@@ -275,10 +275,10 @@ class StableUnCLIPImg2ImgPipeline(
275
275
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
276
276
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
277
277
  less than `1`).
278
- prompt_embeds (`torch.FloatTensor`, *optional*):
278
+ prompt_embeds (`torch.Tensor`, *optional*):
279
279
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
280
280
  provided, text embeddings will be generated from `prompt` input argument.
281
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
281
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
282
282
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
283
283
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
284
284
  argument.
@@ -537,13 +537,18 @@ class StableUnCLIPImg2ImgPipeline(
537
537
  and not isinstance(image, list)
538
538
  ):
539
539
  raise ValueError(
540
- "`image` has to be of type `torch.FloatTensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
540
+ "`image` has to be of type `torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
541
541
  f" {type(image)}"
542
542
  )
543
543
 
544
544
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
545
545
  def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
546
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
546
+ shape = (
547
+ batch_size,
548
+ num_channels_latents,
549
+ int(height) // self.vae_scale_factor,
550
+ int(width) // self.vae_scale_factor,
551
+ )
547
552
  if isinstance(generator, list) and len(generator) != batch_size:
548
553
  raise ValueError(
549
554
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -564,7 +569,7 @@ class StableUnCLIPImg2ImgPipeline(
564
569
  self,
565
570
  image_embeds: torch.Tensor,
566
571
  noise_level: int,
567
- noise: Optional[torch.FloatTensor] = None,
572
+ noise: Optional[torch.Tensor] = None,
568
573
  generator: Optional[torch.Generator] = None,
569
574
  ):
570
575
  """
@@ -610,7 +615,7 @@ class StableUnCLIPImg2ImgPipeline(
610
615
  @replace_example_docstring(EXAMPLE_DOC_STRING)
611
616
  def __call__(
612
617
  self,
613
- image: Union[torch.FloatTensor, PIL.Image.Image] = None,
618
+ image: Union[torch.Tensor, PIL.Image.Image] = None,
614
619
  prompt: Union[str, List[str]] = None,
615
620
  height: Optional[int] = None,
616
621
  width: Optional[int] = None,
@@ -620,16 +625,16 @@ class StableUnCLIPImg2ImgPipeline(
620
625
  num_images_per_prompt: Optional[int] = 1,
621
626
  eta: float = 0.0,
622
627
  generator: Optional[torch.Generator] = None,
623
- latents: Optional[torch.FloatTensor] = None,
624
- prompt_embeds: Optional[torch.FloatTensor] = None,
625
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
628
+ latents: Optional[torch.Tensor] = None,
629
+ prompt_embeds: Optional[torch.Tensor] = None,
630
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
626
631
  output_type: Optional[str] = "pil",
627
632
  return_dict: bool = True,
628
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
633
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
629
634
  callback_steps: int = 1,
630
635
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
631
636
  noise_level: int = 0,
632
- image_embeds: Optional[torch.FloatTensor] = None,
637
+ image_embeds: Optional[torch.Tensor] = None,
633
638
  clip_skip: Optional[int] = None,
634
639
  ):
635
640
  r"""
@@ -639,7 +644,7 @@ class StableUnCLIPImg2ImgPipeline(
639
644
  prompt (`str` or `List[str]`, *optional*):
640
645
  The prompt or prompts to guide the image generation. If not defined, either `prompt_embeds` will be
641
646
  used or prompt is initialized to `""`.
642
- image (`torch.FloatTensor` or `PIL.Image.Image`):
647
+ image (`torch.Tensor` or `PIL.Image.Image`):
643
648
  `Image` or tensor representing an image batch. The image is encoded to its CLIP embedding which the
644
649
  `unet` is conditioned on. The image is _not_ encoded by the `vae` and then used as the latents in the
645
650
  denoising process like it is in the standard Stable Diffusion text-guided image variation process.
@@ -664,14 +669,14 @@ class StableUnCLIPImg2ImgPipeline(
664
669
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
665
670
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
666
671
  generation deterministic.
667
- latents (`torch.FloatTensor`, *optional*):
672
+ latents (`torch.Tensor`, *optional*):
668
673
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
669
674
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
670
675
  tensor is generated by sampling using the supplied random `generator`.
671
- prompt_embeds (`torch.FloatTensor`, *optional*):
676
+ prompt_embeds (`torch.Tensor`, *optional*):
672
677
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
673
678
  provided, text embeddings are generated from the `prompt` input argument.
674
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
679
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
675
680
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
676
681
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
677
682
  output_type (`str`, *optional*, defaults to `"pil"`):
@@ -680,7 +685,7 @@ class StableUnCLIPImg2ImgPipeline(
680
685
  Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
681
686
  callback (`Callable`, *optional*):
682
687
  A function that calls every `callback_steps` steps during inference. The function is called with the
683
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
688
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
684
689
  callback_steps (`int`, *optional*, defaults to 1):
685
690
  The frequency at which the `callback` function is called. If not specified, the callback is called at
686
691
  every step.
@@ -690,7 +695,7 @@ class StableUnCLIPImg2ImgPipeline(
690
695
  noise_level (`int`, *optional*, defaults to `0`):
691
696
  The amount of noise to add to the image embeddings. A higher `noise_level` increases the variance in
692
697
  the final un-noised images. See [`StableUnCLIPPipeline.noise_image_embeddings`] for more details.
693
- image_embeds (`torch.FloatTensor`, *optional*):
698
+ image_embeds (`torch.Tensor`, *optional*):
694
699
  Pre-generated CLIP embeddings to condition the `unet` on. These latents are not used in the denoising
695
700
  process. If you want to provide pre-generated latents, pass them to `__call__` as `latents`.
696
701
  clip_skip (`int`, *optional*):
@@ -781,16 +786,17 @@ class StableUnCLIPImg2ImgPipeline(
781
786
 
782
787
  # 6. Prepare latent variables
783
788
  num_channels_latents = self.unet.config.in_channels
784
- latents = self.prepare_latents(
785
- batch_size=batch_size,
786
- num_channels_latents=num_channels_latents,
787
- height=height,
788
- width=width,
789
- dtype=prompt_embeds.dtype,
790
- device=device,
791
- generator=generator,
792
- latents=latents,
793
- )
789
+ if latents is None:
790
+ latents = self.prepare_latents(
791
+ batch_size=batch_size,
792
+ num_channels_latents=num_channels_latents,
793
+ height=height,
794
+ width=width,
795
+ dtype=prompt_embeds.dtype,
796
+ device=device,
797
+ generator=generator,
798
+ latents=latents,
799
+ )
794
800
 
795
801
  # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
796
802
  extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
@@ -31,6 +31,7 @@ def cosine_distance(image_embeds, text_embeds):
31
31
 
32
32
  class StableDiffusionSafetyChecker(PreTrainedModel):
33
33
  config_class = CLIPConfig
34
+ main_input_name = "clip_input"
34
35
 
35
36
  _no_split_modules = ["CLIPEncoderLayer"]
36
37
 
@@ -99,7 +100,7 @@ class StableDiffusionSafetyChecker(PreTrainedModel):
99
100
  return images, has_nsfw_concepts
100
101
 
101
102
  @torch.no_grad()
102
- def forward_onnx(self, clip_input: torch.FloatTensor, images: torch.FloatTensor):
103
+ def forward_onnx(self, clip_input: torch.Tensor, images: torch.Tensor):
103
104
  pooled_output = self.vision_model(clip_input)[1] # pooled_output
104
105
  image_embeds = self.visual_projection(pooled_output)
105
106
 
@@ -254,8 +254,8 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
254
254
  num_images_per_prompt,
255
255
  do_classifier_free_guidance,
256
256
  negative_prompt=None,
257
- prompt_embeds: Optional[torch.FloatTensor] = None,
258
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
257
+ prompt_embeds: Optional[torch.Tensor] = None,
258
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
259
259
  lora_scale: Optional[float] = None,
260
260
  **kwargs,
261
261
  ):
@@ -287,8 +287,8 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
287
287
  num_images_per_prompt,
288
288
  do_classifier_free_guidance,
289
289
  negative_prompt=None,
290
- prompt_embeds: Optional[torch.FloatTensor] = None,
291
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
290
+ prompt_embeds: Optional[torch.Tensor] = None,
291
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
292
292
  lora_scale: Optional[float] = None,
293
293
  clip_skip: Optional[int] = None,
294
294
  ):
@@ -308,10 +308,10 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
308
308
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
309
309
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
310
310
  less than `1`).
311
- prompt_embeds (`torch.FloatTensor`, *optional*):
311
+ prompt_embeds (`torch.Tensor`, *optional*):
312
312
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
313
313
  provided, text embeddings will be generated from `prompt` input argument.
314
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
314
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
315
315
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
316
316
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
317
317
  argument.
@@ -581,7 +581,12 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
581
581
 
582
582
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
583
583
  def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
584
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
584
+ shape = (
585
+ batch_size,
586
+ num_channels_latents,
587
+ int(height) // self.vae_scale_factor,
588
+ int(width) // self.vae_scale_factor,
589
+ )
585
590
  if isinstance(generator, list) and len(generator) != batch_size:
586
591
  raise ValueError(
587
592
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -741,12 +746,12 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
741
746
  num_images_per_prompt: int = 1,
742
747
  eta: float = 0.0,
743
748
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
744
- latents: Optional[torch.FloatTensor] = None,
745
- prompt_embeds: Optional[torch.FloatTensor] = None,
746
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
749
+ latents: Optional[torch.Tensor] = None,
750
+ prompt_embeds: Optional[torch.Tensor] = None,
751
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
747
752
  output_type: Optional[str] = "pil",
748
753
  return_dict: bool = True,
749
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
754
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
750
755
  callback_steps: int = 1,
751
756
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
752
757
  max_iter_to_alter: int = 25,
@@ -784,14 +789,14 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
784
789
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
785
790
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
786
791
  generation deterministic.
787
- latents (`torch.FloatTensor`, *optional*):
792
+ latents (`torch.Tensor`, *optional*):
788
793
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
789
794
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
790
795
  tensor is generated by sampling using the supplied random `generator`.
791
- prompt_embeds (`torch.FloatTensor`, *optional*):
796
+ prompt_embeds (`torch.Tensor`, *optional*):
792
797
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
793
798
  provided, text embeddings are generated from the `prompt` input argument.
794
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
799
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
795
800
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
796
801
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
797
802
  output_type (`str`, *optional*, defaults to `"pil"`):
@@ -801,7 +806,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
801
806
  plain tuple.
802
807
  callback (`Callable`, *optional*):
803
808
  A function that calls every `callback_steps` steps during inference. The function is called with the
804
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
809
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
805
810
  callback_steps (`int`, *optional*, defaults to 1):
806
811
  The frequency at which the `callback` function is called. If not specified, the callback is called at
807
812
  every step.
@@ -902,6 +907,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
902
907
  if attn_res is None:
903
908
  attn_res = int(np.ceil(width / 32)), int(np.ceil(height / 32))
904
909
  self.attention_store = AttentionStore(attn_res)
910
+ original_attn_proc = self.unet.attn_processors
905
911
  self.register_attention_control()
906
912
 
907
913
  # default config for step size from original repo
@@ -1016,6 +1022,8 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
1016
1022
 
1017
1023
  image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
1018
1024
  self.maybe_free_model_hooks()
1025
+ # make sure to set the original attention processors back
1026
+ self.unet.set_attn_processor(original_attn_proc)
1019
1027
 
1020
1028
  if not return_dict:
1021
1029
  return (image, has_nsfw_concept)
@@ -53,7 +53,7 @@ class DiffEditInversionPipelineOutput(BaseOutput):
53
53
  Output class for Stable Diffusion pipelines.
54
54
 
55
55
  Args:
56
- latents (`torch.FloatTensor`)
56
+ latents (`torch.Tensor`)
57
57
  inverted latents tensor
58
58
  images (`List[PIL.Image.Image]` or `np.ndarray`)
59
59
  List of denoised PIL images of length `num_timesteps * batch_size` or numpy array of shape `(num_timesteps,
@@ -61,7 +61,7 @@ class DiffEditInversionPipelineOutput(BaseOutput):
61
61
  diffusion pipeline.
62
62
  """
63
63
 
64
- latents: torch.FloatTensor
64
+ latents: torch.Tensor
65
65
  images: Union[List[PIL.Image.Image], np.ndarray]
66
66
 
67
67
 
@@ -185,7 +185,7 @@ def preprocess(image):
185
185
  def preprocess_mask(mask, batch_size: int = 1):
186
186
  if not isinstance(mask, torch.Tensor):
187
187
  # preprocess mask
188
- if isinstance(mask, PIL.Image.Image) or isinstance(mask, np.ndarray):
188
+ if isinstance(mask, (PIL.Image.Image, np.ndarray)):
189
189
  mask = [mask]
190
190
 
191
191
  if isinstance(mask, list):
@@ -381,8 +381,8 @@ class StableDiffusionDiffEditPipeline(
381
381
  num_images_per_prompt,
382
382
  do_classifier_free_guidance,
383
383
  negative_prompt=None,
384
- prompt_embeds: Optional[torch.FloatTensor] = None,
385
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
384
+ prompt_embeds: Optional[torch.Tensor] = None,
385
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
386
386
  lora_scale: Optional[float] = None,
387
387
  **kwargs,
388
388
  ):
@@ -414,8 +414,8 @@ class StableDiffusionDiffEditPipeline(
414
414
  num_images_per_prompt,
415
415
  do_classifier_free_guidance,
416
416
  negative_prompt=None,
417
- prompt_embeds: Optional[torch.FloatTensor] = None,
418
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
417
+ prompt_embeds: Optional[torch.Tensor] = None,
418
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
419
419
  lora_scale: Optional[float] = None,
420
420
  clip_skip: Optional[int] = None,
421
421
  ):
@@ -435,10 +435,10 @@ class StableDiffusionDiffEditPipeline(
435
435
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
436
436
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
437
437
  less than `1`).
438
- prompt_embeds (`torch.FloatTensor`, *optional*):
438
+ prompt_embeds (`torch.Tensor`, *optional*):
439
439
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
440
440
  provided, text embeddings will be generated from `prompt` input argument.
441
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
441
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
442
442
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
443
443
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
444
444
  argument.
@@ -740,7 +740,12 @@ class StableDiffusionDiffEditPipeline(
740
740
 
741
741
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
742
742
  def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
743
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
743
+ shape = (
744
+ batch_size,
745
+ num_channels_latents,
746
+ int(height) // self.vae_scale_factor,
747
+ int(width) // self.vae_scale_factor,
748
+ )
744
749
  if isinstance(generator, list) and len(generator) != batch_size:
745
750
  raise ValueError(
746
751
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -826,15 +831,15 @@ class StableDiffusionDiffEditPipeline(
826
831
  @replace_example_docstring(EXAMPLE_DOC_STRING)
827
832
  def generate_mask(
828
833
  self,
829
- image: Union[torch.FloatTensor, PIL.Image.Image] = None,
834
+ image: Union[torch.Tensor, PIL.Image.Image] = None,
830
835
  target_prompt: Optional[Union[str, List[str]]] = None,
831
836
  target_negative_prompt: Optional[Union[str, List[str]]] = None,
832
- target_prompt_embeds: Optional[torch.FloatTensor] = None,
833
- target_negative_prompt_embeds: Optional[torch.FloatTensor] = None,
837
+ target_prompt_embeds: Optional[torch.Tensor] = None,
838
+ target_negative_prompt_embeds: Optional[torch.Tensor] = None,
834
839
  source_prompt: Optional[Union[str, List[str]]] = None,
835
840
  source_negative_prompt: Optional[Union[str, List[str]]] = None,
836
- source_prompt_embeds: Optional[torch.FloatTensor] = None,
837
- source_negative_prompt_embeds: Optional[torch.FloatTensor] = None,
841
+ source_prompt_embeds: Optional[torch.Tensor] = None,
842
+ source_negative_prompt_embeds: Optional[torch.Tensor] = None,
838
843
  num_maps_per_mask: Optional[int] = 10,
839
844
  mask_encode_strength: Optional[float] = 0.5,
840
845
  mask_thresholding_ratio: Optional[float] = 3.0,
@@ -856,10 +861,10 @@ class StableDiffusionDiffEditPipeline(
856
861
  target_negative_prompt (`str` or `List[str]`, *optional*):
857
862
  The prompt or prompts to guide what to not include in image generation. If not defined, you need to
858
863
  pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
859
- target_prompt_embeds (`torch.FloatTensor`, *optional*):
864
+ target_prompt_embeds (`torch.Tensor`, *optional*):
860
865
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
861
866
  provided, text embeddings are generated from the `prompt` input argument.
862
- target_negative_prompt_embeds (`torch.FloatTensor`, *optional*):
867
+ target_negative_prompt_embeds (`torch.Tensor`, *optional*):
863
868
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
864
869
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
865
870
  source_prompt (`str` or `List[str]`, *optional*):
@@ -868,11 +873,11 @@ class StableDiffusionDiffEditPipeline(
868
873
  source_negative_prompt (`str` or `List[str]`, *optional*):
869
874
  The prompt or prompts to guide semantic mask generation away from using DiffEdit. If not defined, you
870
875
  need to pass `source_negative_prompt_embeds` or `source_image` instead.
871
- source_prompt_embeds (`torch.FloatTensor`, *optional*):
876
+ source_prompt_embeds (`torch.Tensor`, *optional*):
872
877
  Pre-generated text embeddings to guide the semantic mask generation. Can be used to easily tweak text
873
878
  inputs (prompt weighting). If not provided, text embeddings are generated from `source_prompt` input
874
879
  argument.
875
- source_negative_prompt_embeds (`torch.FloatTensor`, *optional*):
880
+ source_negative_prompt_embeds (`torch.Tensor`, *optional*):
876
881
  Pre-generated text embeddings to negatively guide the semantic mask generation. Can be used to easily
877
882
  tweak text inputs (prompt weighting). If not provided, text embeddings are generated from
878
883
  `source_negative_prompt` input argument.
@@ -1046,18 +1051,18 @@ class StableDiffusionDiffEditPipeline(
1046
1051
  def invert(
1047
1052
  self,
1048
1053
  prompt: Optional[Union[str, List[str]]] = None,
1049
- image: Union[torch.FloatTensor, PIL.Image.Image] = None,
1054
+ image: Union[torch.Tensor, PIL.Image.Image] = None,
1050
1055
  num_inference_steps: int = 50,
1051
1056
  inpaint_strength: float = 0.8,
1052
1057
  guidance_scale: float = 7.5,
1053
1058
  negative_prompt: Optional[Union[str, List[str]]] = None,
1054
1059
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
1055
- prompt_embeds: Optional[torch.FloatTensor] = None,
1056
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
1060
+ prompt_embeds: Optional[torch.Tensor] = None,
1061
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
1057
1062
  decode_latents: bool = False,
1058
1063
  output_type: Optional[str] = "pil",
1059
1064
  return_dict: bool = True,
1060
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
1065
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
1061
1066
  callback_steps: Optional[int] = 1,
1062
1067
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
1063
1068
  lambda_auto_corr: float = 20.0,
@@ -1090,10 +1095,10 @@ class StableDiffusionDiffEditPipeline(
1090
1095
  generator (`torch.Generator`, *optional*):
1091
1096
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
1092
1097
  generation deterministic.
1093
- prompt_embeds (`torch.FloatTensor`, *optional*):
1098
+ prompt_embeds (`torch.Tensor`, *optional*):
1094
1099
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
1095
1100
  provided, text embeddings are generated from the `prompt` input argument.
1096
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
1101
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
1097
1102
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
1098
1103
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
1099
1104
  decode_latents (`bool`, *optional*, defaults to `False`):
@@ -1106,7 +1111,7 @@ class StableDiffusionDiffEditPipeline(
1106
1111
  plain tuple.
1107
1112
  callback (`Callable`, *optional*):
1108
1113
  A function that calls every `callback_steps` steps during inference. The function is called with the
1109
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
1114
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
1110
1115
  callback_steps (`int`, *optional*, defaults to 1):
1111
1116
  The frequency at which the `callback` function is called. If not specified, the callback is called at
1112
1117
  every step.
@@ -1284,8 +1289,8 @@ class StableDiffusionDiffEditPipeline(
1284
1289
  def __call__(
1285
1290
  self,
1286
1291
  prompt: Optional[Union[str, List[str]]] = None,
1287
- mask_image: Union[torch.FloatTensor, PIL.Image.Image] = None,
1288
- image_latents: Union[torch.FloatTensor, PIL.Image.Image] = None,
1292
+ mask_image: Union[torch.Tensor, PIL.Image.Image] = None,
1293
+ image_latents: Union[torch.Tensor, PIL.Image.Image] = None,
1289
1294
  inpaint_strength: Optional[float] = 0.8,
1290
1295
  num_inference_steps: int = 50,
1291
1296
  guidance_scale: float = 7.5,
@@ -1293,15 +1298,15 @@ class StableDiffusionDiffEditPipeline(
1293
1298
  num_images_per_prompt: Optional[int] = 1,
1294
1299
  eta: float = 0.0,
1295
1300
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
1296
- latents: Optional[torch.FloatTensor] = None,
1297
- prompt_embeds: Optional[torch.FloatTensor] = None,
1298
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
1301
+ latents: Optional[torch.Tensor] = None,
1302
+ prompt_embeds: Optional[torch.Tensor] = None,
1303
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
1299
1304
  output_type: Optional[str] = "pil",
1300
1305
  return_dict: bool = True,
1301
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
1306
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
1302
1307
  callback_steps: int = 1,
1303
1308
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
1304
- clip_ckip: int = None,
1309
+ clip_skip: int = None,
1305
1310
  ):
1306
1311
  r"""
1307
1312
  The call function to the pipeline for generation.
@@ -1314,7 +1319,7 @@ class StableDiffusionDiffEditPipeline(
1314
1319
  repainted, while black pixels are preserved. If `mask_image` is a PIL image, it is converted to a
1315
1320
  single channel (luminance) before use. If it's a tensor, it should contain one color channel (L)
1316
1321
  instead of 3, so the expected shape would be `(B, 1, H, W)`.
1317
- image_latents (`PIL.Image.Image` or `torch.FloatTensor`):
1322
+ image_latents (`PIL.Image.Image` or `torch.Tensor`):
1318
1323
  Partially noised image latents from the inversion process to be used as inputs for image generation.
1319
1324
  inpaint_strength (`float`, *optional*, defaults to 0.8):
1320
1325
  Indicates extent to inpaint the masked area. Must be between 0 and 1. When `inpaint_strength` is 1, the
@@ -1338,14 +1343,14 @@ class StableDiffusionDiffEditPipeline(
1338
1343
  generator (`torch.Generator`, *optional*):
1339
1344
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
1340
1345
  generation deterministic.
1341
- latents (`torch.FloatTensor`, *optional*):
1346
+ latents (`torch.Tensor`, *optional*):
1342
1347
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
1343
1348
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
1344
1349
  tensor is generated by sampling using the supplied random `generator`.
1345
- prompt_embeds (`torch.FloatTensor`, *optional*):
1350
+ prompt_embeds (`torch.Tensor`, *optional*):
1346
1351
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
1347
1352
  provided, text embeddings are generated from the `prompt` input argument.
1348
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
1353
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
1349
1354
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
1350
1355
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
1351
1356
  output_type (`str`, *optional*, defaults to `"pil"`):
@@ -1355,7 +1360,7 @@ class StableDiffusionDiffEditPipeline(
1355
1360
  plain tuple.
1356
1361
  callback (`Callable`, *optional*):
1357
1362
  A function that calls every `callback_steps` steps during inference. The function is called with the
1358
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
1363
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
1359
1364
  callback_steps (`int`, *optional*, defaults to 1):
1360
1365
  The frequency at which the `callback` function is called. If not specified, the callback is called at
1361
1366
  every step.
@@ -1423,7 +1428,7 @@ class StableDiffusionDiffEditPipeline(
1423
1428
  prompt_embeds=prompt_embeds,
1424
1429
  negative_prompt_embeds=negative_prompt_embeds,
1425
1430
  lora_scale=text_encoder_lora_scale,
1426
- clip_skip=clip_ckip,
1431
+ clip_skip=clip_skip,
1427
1432
  )
1428
1433
  # For classifier free guidance, we need to do two forward passes.
1429
1434
  # Here we concatenate the unconditional and text embeddings into a single batch