diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. diffusers/__init__.py +18 -1
  2. diffusers/callbacks.py +156 -0
  3. diffusers/commands/env.py +110 -6
  4. diffusers/configuration_utils.py +16 -11
  5. diffusers/dependency_versions_table.py +2 -1
  6. diffusers/image_processor.py +158 -45
  7. diffusers/loaders/__init__.py +2 -5
  8. diffusers/loaders/autoencoder.py +4 -4
  9. diffusers/loaders/controlnet.py +4 -4
  10. diffusers/loaders/ip_adapter.py +80 -22
  11. diffusers/loaders/lora.py +134 -20
  12. diffusers/loaders/lora_conversion_utils.py +46 -43
  13. diffusers/loaders/peft.py +4 -3
  14. diffusers/loaders/single_file.py +401 -170
  15. diffusers/loaders/single_file_model.py +290 -0
  16. diffusers/loaders/single_file_utils.py +616 -672
  17. diffusers/loaders/textual_inversion.py +41 -20
  18. diffusers/loaders/unet.py +168 -115
  19. diffusers/loaders/unet_loader_utils.py +163 -0
  20. diffusers/models/__init__.py +2 -0
  21. diffusers/models/activations.py +11 -3
  22. diffusers/models/attention.py +10 -11
  23. diffusers/models/attention_processor.py +367 -148
  24. diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
  25. diffusers/models/autoencoders/autoencoder_kl.py +18 -19
  26. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
  27. diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
  28. diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
  29. diffusers/models/autoencoders/vae.py +23 -24
  30. diffusers/models/controlnet.py +12 -9
  31. diffusers/models/controlnet_flax.py +4 -4
  32. diffusers/models/controlnet_xs.py +1915 -0
  33. diffusers/models/downsampling.py +17 -18
  34. diffusers/models/embeddings.py +147 -24
  35. diffusers/models/model_loading_utils.py +149 -0
  36. diffusers/models/modeling_flax_pytorch_utils.py +2 -1
  37. diffusers/models/modeling_flax_utils.py +4 -4
  38. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  39. diffusers/models/modeling_utils.py +118 -98
  40. diffusers/models/resnet.py +18 -23
  41. diffusers/models/transformer_temporal.py +3 -3
  42. diffusers/models/transformers/dual_transformer_2d.py +4 -4
  43. diffusers/models/transformers/prior_transformer.py +7 -7
  44. diffusers/models/transformers/t5_film_transformer.py +17 -19
  45. diffusers/models/transformers/transformer_2d.py +272 -156
  46. diffusers/models/transformers/transformer_temporal.py +10 -10
  47. diffusers/models/unets/unet_1d.py +5 -5
  48. diffusers/models/unets/unet_1d_blocks.py +29 -29
  49. diffusers/models/unets/unet_2d.py +6 -6
  50. diffusers/models/unets/unet_2d_blocks.py +137 -128
  51. diffusers/models/unets/unet_2d_condition.py +20 -15
  52. diffusers/models/unets/unet_2d_condition_flax.py +6 -5
  53. diffusers/models/unets/unet_3d_blocks.py +79 -77
  54. diffusers/models/unets/unet_3d_condition.py +13 -9
  55. diffusers/models/unets/unet_i2vgen_xl.py +14 -13
  56. diffusers/models/unets/unet_kandinsky3.py +1 -1
  57. diffusers/models/unets/unet_motion_model.py +114 -14
  58. diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
  59. diffusers/models/unets/unet_stable_cascade.py +16 -13
  60. diffusers/models/upsampling.py +17 -20
  61. diffusers/models/vq_model.py +16 -15
  62. diffusers/pipelines/__init__.py +25 -3
  63. diffusers/pipelines/amused/pipeline_amused.py +12 -12
  64. diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
  65. diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
  66. diffusers/pipelines/animatediff/__init__.py +2 -0
  67. diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
  68. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
  69. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
  70. diffusers/pipelines/animatediff/pipeline_output.py +3 -2
  71. diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
  72. diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
  73. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
  74. diffusers/pipelines/auto_pipeline.py +21 -17
  75. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  76. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
  77. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
  78. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
  79. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
  80. diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
  81. diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
  82. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
  83. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
  84. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
  85. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
  86. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
  87. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
  88. diffusers/pipelines/controlnet_xs/__init__.py +68 -0
  89. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
  90. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
  91. diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
  92. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
  93. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
  94. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
  95. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
  96. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
  97. diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
  98. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
  99. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
  100. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
  101. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
  102. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
  103. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -21
  104. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
  105. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
  106. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
  107. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
  108. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
  109. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
  110. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
  111. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
  112. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
  113. diffusers/pipelines/dit/pipeline_dit.py +3 -0
  114. diffusers/pipelines/free_init_utils.py +39 -38
  115. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
  116. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
  117. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
  118. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
  119. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
  120. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
  121. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  122. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
  123. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
  124. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
  125. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
  126. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
  127. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
  128. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
  129. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
  130. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
  131. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
  132. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
  133. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
  134. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
  135. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
  136. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
  137. diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
  138. diffusers/pipelines/marigold/__init__.py +50 -0
  139. diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
  140. diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
  141. diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
  142. diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
  143. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
  144. diffusers/pipelines/pia/pipeline_pia.py +39 -125
  145. diffusers/pipelines/pipeline_flax_utils.py +4 -4
  146. diffusers/pipelines/pipeline_loading_utils.py +268 -23
  147. diffusers/pipelines/pipeline_utils.py +266 -37
  148. diffusers/pipelines/pixart_alpha/__init__.py +8 -1
  149. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
  150. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
  151. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
  152. diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
  153. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
  154. diffusers/pipelines/shap_e/renderer.py +1 -1
  155. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +36 -22
  156. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
  157. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
  158. diffusers/pipelines/stable_diffusion/__init__.py +0 -1
  159. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
  160. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  161. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
  162. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
  163. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
  164. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
  165. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
  166. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
  167. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
  168. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
  169. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
  170. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
  171. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
  172. diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
  173. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
  174. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -42
  175. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
  176. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
  177. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
  178. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
  179. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
  180. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
  181. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
  182. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  183. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
  184. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
  185. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
  186. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
  187. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
  188. diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
  189. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
  190. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
  191. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
  192. diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
  193. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
  194. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
  195. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
  196. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
  197. diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
  198. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
  199. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
  200. diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
  201. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
  202. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
  203. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
  204. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
  205. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
  206. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
  207. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
  208. diffusers/schedulers/__init__.py +2 -2
  209. diffusers/schedulers/deprecated/__init__.py +1 -1
  210. diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
  211. diffusers/schedulers/scheduling_amused.py +5 -5
  212. diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
  213. diffusers/schedulers/scheduling_consistency_models.py +23 -25
  214. diffusers/schedulers/scheduling_ddim.py +22 -24
  215. diffusers/schedulers/scheduling_ddim_flax.py +2 -1
  216. diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
  217. diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
  218. diffusers/schedulers/scheduling_ddpm.py +20 -22
  219. diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
  220. diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
  221. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
  222. diffusers/schedulers/scheduling_deis_multistep.py +46 -42
  223. diffusers/schedulers/scheduling_dpmsolver_multistep.py +107 -77
  224. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
  225. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
  226. diffusers/schedulers/scheduling_dpmsolver_sde.py +26 -22
  227. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +90 -65
  228. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +78 -53
  229. diffusers/schedulers/scheduling_edm_euler.py +53 -30
  230. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +26 -28
  231. diffusers/schedulers/scheduling_euler_discrete.py +163 -67
  232. diffusers/schedulers/scheduling_heun_discrete.py +60 -38
  233. diffusers/schedulers/scheduling_ipndm.py +8 -8
  234. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +22 -18
  235. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +22 -18
  236. diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
  237. diffusers/schedulers/scheduling_lcm.py +21 -23
  238. diffusers/schedulers/scheduling_lms_discrete.py +27 -25
  239. diffusers/schedulers/scheduling_pndm.py +20 -20
  240. diffusers/schedulers/scheduling_repaint.py +20 -20
  241. diffusers/schedulers/scheduling_sasolver.py +55 -54
  242. diffusers/schedulers/scheduling_sde_ve.py +19 -19
  243. diffusers/schedulers/scheduling_tcd.py +39 -30
  244. diffusers/schedulers/scheduling_unclip.py +15 -15
  245. diffusers/schedulers/scheduling_unipc_multistep.py +115 -41
  246. diffusers/schedulers/scheduling_utils.py +14 -5
  247. diffusers/schedulers/scheduling_utils_flax.py +3 -3
  248. diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
  249. diffusers/training_utils.py +56 -1
  250. diffusers/utils/__init__.py +7 -0
  251. diffusers/utils/doc_utils.py +1 -0
  252. diffusers/utils/dummy_pt_objects.py +30 -0
  253. diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
  254. diffusers/utils/dynamic_modules_utils.py +24 -11
  255. diffusers/utils/hub_utils.py +3 -2
  256. diffusers/utils/import_utils.py +91 -0
  257. diffusers/utils/loading_utils.py +2 -2
  258. diffusers/utils/logging.py +1 -1
  259. diffusers/utils/peft_utils.py +32 -5
  260. diffusers/utils/state_dict_utils.py +11 -2
  261. diffusers/utils/testing_utils.py +71 -6
  262. diffusers/utils/torch_utils.py +1 -0
  263. diffusers/video_processor.py +113 -0
  264. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/METADATA +7 -7
  265. diffusers-0.28.0.dist-info/RECORD +414 -0
  266. diffusers-0.27.1.dist-info/RECORD +0 -399
  267. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
  268. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/WHEEL +0 -0
  269. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
  270. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -21,6 +21,7 @@ import torch
21
21
  from packaging import version
22
22
  from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
23
23
 
24
+ from ...callbacks import MultiPipelineCallbacks, PipelineCallback
24
25
  from ...configuration_utils import FrozenDict
25
26
  from ...image_processor import PipelineImageInput, VaeImageProcessor
26
27
  from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
@@ -115,6 +116,7 @@ def retrieve_timesteps(
115
116
  num_inference_steps: Optional[int] = None,
116
117
  device: Optional[Union[str, torch.device]] = None,
117
118
  timesteps: Optional[List[int]] = None,
119
+ sigmas: Optional[List[float]] = None,
118
120
  **kwargs,
119
121
  ):
120
122
  """
@@ -125,19 +127,23 @@ def retrieve_timesteps(
125
127
  scheduler (`SchedulerMixin`):
126
128
  The scheduler to get timesteps from.
127
129
  num_inference_steps (`int`):
128
- The number of diffusion steps used when generating samples with a pre-trained model. If used,
129
- `timesteps` must be `None`.
130
+ The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
131
+ must be `None`.
130
132
  device (`str` or `torch.device`, *optional*):
131
133
  The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
132
134
  timesteps (`List[int]`, *optional*):
133
- Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
134
- timestep spacing strategy of the scheduler is used. If `timesteps` is passed, `num_inference_steps`
135
- must be `None`.
135
+ Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
136
+ `num_inference_steps` and `sigmas` must be `None`.
137
+ sigmas (`List[float]`, *optional*):
138
+ Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
139
+ `num_inference_steps` and `timesteps` must be `None`.
136
140
 
137
141
  Returns:
138
142
  `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
139
143
  second element is the number of inference steps.
140
144
  """
145
+ if timesteps is not None and sigmas is not None:
146
+ raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
141
147
  if timesteps is not None:
142
148
  accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
143
149
  if not accepts_timesteps:
@@ -148,6 +154,16 @@ def retrieve_timesteps(
148
154
  scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
149
155
  timesteps = scheduler.timesteps
150
156
  num_inference_steps = len(timesteps)
157
+ elif sigmas is not None:
158
+ accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
159
+ if not accept_sigmas:
160
+ raise ValueError(
161
+ f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
162
+ f" sigmas schedules. Please check whether you are using the correct scheduler."
163
+ )
164
+ scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
165
+ timesteps = scheduler.timesteps
166
+ num_inference_steps = len(timesteps)
151
167
  else:
152
168
  scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
153
169
  timesteps = scheduler.timesteps
@@ -300,8 +316,8 @@ class StableDiffusionImg2ImgPipeline(
300
316
  num_images_per_prompt,
301
317
  do_classifier_free_guidance,
302
318
  negative_prompt=None,
303
- prompt_embeds: Optional[torch.FloatTensor] = None,
304
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
319
+ prompt_embeds: Optional[torch.Tensor] = None,
320
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
305
321
  lora_scale: Optional[float] = None,
306
322
  **kwargs,
307
323
  ):
@@ -333,8 +349,8 @@ class StableDiffusionImg2ImgPipeline(
333
349
  num_images_per_prompt,
334
350
  do_classifier_free_guidance,
335
351
  negative_prompt=None,
336
- prompt_embeds: Optional[torch.FloatTensor] = None,
337
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
352
+ prompt_embeds: Optional[torch.Tensor] = None,
353
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
338
354
  lora_scale: Optional[float] = None,
339
355
  clip_skip: Optional[int] = None,
340
356
  ):
@@ -354,10 +370,10 @@ class StableDiffusionImg2ImgPipeline(
354
370
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
355
371
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
356
372
  less than `1`).
357
- prompt_embeds (`torch.FloatTensor`, *optional*):
373
+ prompt_embeds (`torch.Tensor`, *optional*):
358
374
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
359
375
  provided, text embeddings will be generated from `prompt` input argument.
360
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
376
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
361
377
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
362
378
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
363
379
  argument.
@@ -767,20 +783,22 @@ class StableDiffusionImg2ImgPipeline(
767
783
  return latents
768
784
 
769
785
  # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
770
- def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
786
+ def get_guidance_scale_embedding(
787
+ self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
788
+ ) -> torch.Tensor:
771
789
  """
772
790
  See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
773
791
 
774
792
  Args:
775
- timesteps (`torch.Tensor`):
776
- generate embedding vectors at these timesteps
793
+ w (`torch.Tensor`):
794
+ Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
777
795
  embedding_dim (`int`, *optional*, defaults to 512):
778
- dimension of the embeddings to generate
779
- dtype:
780
- data type of the generated embeddings
796
+ Dimension of the embeddings to generate.
797
+ dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
798
+ Data type of the generated embeddings.
781
799
 
782
800
  Returns:
783
- `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
801
+ `torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
784
802
  """
785
803
  assert len(w.shape) == 1
786
804
  w = w * 1000.0
@@ -831,20 +849,23 @@ class StableDiffusionImg2ImgPipeline(
831
849
  strength: float = 0.8,
832
850
  num_inference_steps: Optional[int] = 50,
833
851
  timesteps: List[int] = None,
852
+ sigmas: List[float] = None,
834
853
  guidance_scale: Optional[float] = 7.5,
835
854
  negative_prompt: Optional[Union[str, List[str]]] = None,
836
855
  num_images_per_prompt: Optional[int] = 1,
837
856
  eta: Optional[float] = 0.0,
838
857
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
839
- prompt_embeds: Optional[torch.FloatTensor] = None,
840
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
858
+ prompt_embeds: Optional[torch.Tensor] = None,
859
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
841
860
  ip_adapter_image: Optional[PipelineImageInput] = None,
842
- ip_adapter_image_embeds: Optional[List[torch.FloatTensor]] = None,
861
+ ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
843
862
  output_type: Optional[str] = "pil",
844
863
  return_dict: bool = True,
845
864
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
846
865
  clip_skip: int = None,
847
- callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
866
+ callback_on_step_end: Optional[
867
+ Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
868
+ ] = None,
848
869
  callback_on_step_end_tensor_inputs: List[str] = ["latents"],
849
870
  **kwargs,
850
871
  ):
@@ -854,7 +875,7 @@ class StableDiffusionImg2ImgPipeline(
854
875
  Args:
855
876
  prompt (`str` or `List[str]`, *optional*):
856
877
  The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
857
- image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
878
+ image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
858
879
  `Image`, numpy array or tensor representing an image batch to be used as the starting point. For both
859
880
  numpy array and pytorch tensor, the expected value range is between `[0, 1]` If it's a tensor or a list
860
881
  or tensors, the expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a
@@ -873,6 +894,10 @@ class StableDiffusionImg2ImgPipeline(
873
894
  Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
874
895
  in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
875
896
  passed will be used. Must be in descending order.
897
+ sigmas (`List[float]`, *optional*):
898
+ Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
899
+ their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
900
+ will be used.
876
901
  guidance_scale (`float`, *optional*, defaults to 7.5):
877
902
  A higher guidance scale value encourages the model to generate images closely linked to the text
878
903
  `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
@@ -887,18 +912,18 @@ class StableDiffusionImg2ImgPipeline(
887
912
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
888
913
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
889
914
  generation deterministic.
890
- prompt_embeds (`torch.FloatTensor`, *optional*):
915
+ prompt_embeds (`torch.Tensor`, *optional*):
891
916
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
892
917
  provided, text embeddings are generated from the `prompt` input argument.
893
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
918
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
894
919
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
895
920
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
896
921
  ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
897
- ip_adapter_image_embeds (`List[torch.FloatTensor]`, *optional*):
898
- Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of IP-adapters.
899
- Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should contain the negative image embedding
900
- if `do_classifier_free_guidance` is set to `True`.
901
- If not provided, embeddings are computed from the `ip_adapter_image` input argument.
922
+ ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
923
+ Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
924
+ IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
925
+ contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
926
+ provided, embeddings are computed from the `ip_adapter_image` input argument.
902
927
  output_type (`str`, *optional*, defaults to `"pil"`):
903
928
  The output format of the generated image. Choose between `PIL.Image` or `np.array`.
904
929
  return_dict (`bool`, *optional*, defaults to `True`):
@@ -910,11 +935,11 @@ class StableDiffusionImg2ImgPipeline(
910
935
  clip_skip (`int`, *optional*):
911
936
  Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
912
937
  the output of the pre-final layer will be used for computing the prompt embeddings.
913
- callback_on_step_end (`Callable`, *optional*):
914
- A function that calls at the end of each denoising steps during the inference. The function is called
915
- with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
916
- callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
917
- `callback_on_step_end_tensor_inputs`.
938
+ callback_on_step_end (`Callable`, `PipelineCallback`, `MultiPipelineCallbacks`, *optional*):
939
+ A function or a subclass of `PipelineCallback` or `MultiPipelineCallbacks` that is called at the end of
940
+ each denoising step during the inference. with the following arguments: `callback_on_step_end(self:
941
+ DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)`. `callback_kwargs` will include a
942
+ list of all tensors as specified by `callback_on_step_end_tensor_inputs`.
918
943
  callback_on_step_end_tensor_inputs (`List`, *optional*):
919
944
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
920
945
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
@@ -945,6 +970,9 @@ class StableDiffusionImg2ImgPipeline(
945
970
  "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
946
971
  )
947
972
 
973
+ if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
974
+ callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
975
+
948
976
  # 1. Check inputs. Raise error if not correct
949
977
  self.check_inputs(
950
978
  prompt,
@@ -1007,7 +1035,9 @@ class StableDiffusionImg2ImgPipeline(
1007
1035
  image = self.image_processor.preprocess(image)
1008
1036
 
1009
1037
  # 5. set timesteps
1010
- timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
1038
+ timesteps, num_inference_steps = retrieve_timesteps(
1039
+ self.scheduler, num_inference_steps, device, timesteps, sigmas
1040
+ )
1011
1041
  timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
1012
1042
  latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
1013
1043
 
@@ -21,6 +21,7 @@ import torch
21
21
  from packaging import version
22
22
  from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
23
23
 
24
+ from ...callbacks import MultiPipelineCallbacks, PipelineCallback
24
25
  from ...configuration_utils import FrozenDict
25
26
  from ...image_processor import PipelineImageInput, VaeImageProcessor
26
27
  from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
@@ -179,6 +180,7 @@ def retrieve_timesteps(
179
180
  num_inference_steps: Optional[int] = None,
180
181
  device: Optional[Union[str, torch.device]] = None,
181
182
  timesteps: Optional[List[int]] = None,
183
+ sigmas: Optional[List[float]] = None,
182
184
  **kwargs,
183
185
  ):
184
186
  """
@@ -189,19 +191,23 @@ def retrieve_timesteps(
189
191
  scheduler (`SchedulerMixin`):
190
192
  The scheduler to get timesteps from.
191
193
  num_inference_steps (`int`):
192
- The number of diffusion steps used when generating samples with a pre-trained model. If used,
193
- `timesteps` must be `None`.
194
+ The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
195
+ must be `None`.
194
196
  device (`str` or `torch.device`, *optional*):
195
197
  The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
196
198
  timesteps (`List[int]`, *optional*):
197
- Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
198
- timestep spacing strategy of the scheduler is used. If `timesteps` is passed, `num_inference_steps`
199
- must be `None`.
199
+ Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
200
+ `num_inference_steps` and `sigmas` must be `None`.
201
+ sigmas (`List[float]`, *optional*):
202
+ Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
203
+ `num_inference_steps` and `timesteps` must be `None`.
200
204
 
201
205
  Returns:
202
206
  `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
203
207
  second element is the number of inference steps.
204
208
  """
209
+ if timesteps is not None and sigmas is not None:
210
+ raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
205
211
  if timesteps is not None:
206
212
  accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
207
213
  if not accepts_timesteps:
@@ -212,6 +218,16 @@ def retrieve_timesteps(
212
218
  scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
213
219
  timesteps = scheduler.timesteps
214
220
  num_inference_steps = len(timesteps)
221
+ elif sigmas is not None:
222
+ accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
223
+ if not accept_sigmas:
224
+ raise ValueError(
225
+ f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
226
+ f" sigmas schedules. Please check whether you are using the correct scheduler."
227
+ )
228
+ scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
229
+ timesteps = scheduler.timesteps
230
+ num_inference_steps = len(timesteps)
215
231
  else:
216
232
  scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
217
233
  timesteps = scheduler.timesteps
@@ -372,8 +388,8 @@ class StableDiffusionInpaintPipeline(
372
388
  num_images_per_prompt,
373
389
  do_classifier_free_guidance,
374
390
  negative_prompt=None,
375
- prompt_embeds: Optional[torch.FloatTensor] = None,
376
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
391
+ prompt_embeds: Optional[torch.Tensor] = None,
392
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
377
393
  lora_scale: Optional[float] = None,
378
394
  **kwargs,
379
395
  ):
@@ -405,8 +421,8 @@ class StableDiffusionInpaintPipeline(
405
421
  num_images_per_prompt,
406
422
  do_classifier_free_guidance,
407
423
  negative_prompt=None,
408
- prompt_embeds: Optional[torch.FloatTensor] = None,
409
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
424
+ prompt_embeds: Optional[torch.Tensor] = None,
425
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
410
426
  lora_scale: Optional[float] = None,
411
427
  clip_skip: Optional[int] = None,
412
428
  ):
@@ -426,10 +442,10 @@ class StableDiffusionInpaintPipeline(
426
442
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
427
443
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
428
444
  less than `1`).
429
- prompt_embeds (`torch.FloatTensor`, *optional*):
445
+ prompt_embeds (`torch.Tensor`, *optional*):
430
446
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
431
447
  provided, text embeddings will be generated from `prompt` input argument.
432
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
448
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
433
449
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
434
450
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
435
451
  argument.
@@ -795,7 +811,12 @@ class StableDiffusionInpaintPipeline(
795
811
  return_noise=False,
796
812
  return_image_latents=False,
797
813
  ):
798
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
814
+ shape = (
815
+ batch_size,
816
+ num_channels_latents,
817
+ int(height) // self.vae_scale_factor,
818
+ int(width) // self.vae_scale_factor,
819
+ )
799
820
  if isinstance(generator, list) and len(generator) != batch_size:
800
821
  raise ValueError(
801
822
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -909,20 +930,22 @@ class StableDiffusionInpaintPipeline(
909
930
  return timesteps, num_inference_steps - t_start
910
931
 
911
932
  # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
912
- def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
933
+ def get_guidance_scale_embedding(
934
+ self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
935
+ ) -> torch.Tensor:
913
936
  """
914
937
  See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
915
938
 
916
939
  Args:
917
- timesteps (`torch.Tensor`):
918
- generate embedding vectors at these timesteps
940
+ w (`torch.Tensor`):
941
+ Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
919
942
  embedding_dim (`int`, *optional*, defaults to 512):
920
- dimension of the embeddings to generate
921
- dtype:
922
- data type of the generated embeddings
943
+ Dimension of the embeddings to generate.
944
+ dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
945
+ Data type of the generated embeddings.
923
946
 
924
947
  Returns:
925
- `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
948
+ `torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
926
949
  """
927
950
  assert len(w.shape) == 1
928
951
  w = w * 1000.0
@@ -970,28 +993,31 @@ class StableDiffusionInpaintPipeline(
970
993
  prompt: Union[str, List[str]] = None,
971
994
  image: PipelineImageInput = None,
972
995
  mask_image: PipelineImageInput = None,
973
- masked_image_latents: torch.FloatTensor = None,
996
+ masked_image_latents: torch.Tensor = None,
974
997
  height: Optional[int] = None,
975
998
  width: Optional[int] = None,
976
999
  padding_mask_crop: Optional[int] = None,
977
1000
  strength: float = 1.0,
978
1001
  num_inference_steps: int = 50,
979
1002
  timesteps: List[int] = None,
1003
+ sigmas: List[float] = None,
980
1004
  guidance_scale: float = 7.5,
981
1005
  negative_prompt: Optional[Union[str, List[str]]] = None,
982
1006
  num_images_per_prompt: Optional[int] = 1,
983
1007
  eta: float = 0.0,
984
1008
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
985
- latents: Optional[torch.FloatTensor] = None,
986
- prompt_embeds: Optional[torch.FloatTensor] = None,
987
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
1009
+ latents: Optional[torch.Tensor] = None,
1010
+ prompt_embeds: Optional[torch.Tensor] = None,
1011
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
988
1012
  ip_adapter_image: Optional[PipelineImageInput] = None,
989
- ip_adapter_image_embeds: Optional[List[torch.FloatTensor]] = None,
1013
+ ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
990
1014
  output_type: Optional[str] = "pil",
991
1015
  return_dict: bool = True,
992
1016
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
993
1017
  clip_skip: int = None,
994
- callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
1018
+ callback_on_step_end: Optional[
1019
+ Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
1020
+ ] = None,
995
1021
  callback_on_step_end_tensor_inputs: List[str] = ["latents"],
996
1022
  **kwargs,
997
1023
  ):
@@ -1001,14 +1027,14 @@ class StableDiffusionInpaintPipeline(
1001
1027
  Args:
1002
1028
  prompt (`str` or `List[str]`, *optional*):
1003
1029
  The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
1004
- image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
1030
+ image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
1005
1031
  `Image`, numpy array or tensor representing an image batch to be inpainted (which parts of the image to
1006
1032
  be masked out with `mask_image` and repainted according to `prompt`). For both numpy array and pytorch
1007
1033
  tensor, the expected value range is between `[0, 1]` If it's a tensor or a list or tensors, the
1008
1034
  expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a list of arrays, the
1009
1035
  expected shape should be `(B, H, W, C)` or `(H, W, C)` It can also accept image latents as `image`, but
1010
1036
  if passing latents directly it is not encoded again.
1011
- mask_image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
1037
+ mask_image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
1012
1038
  `Image`, numpy array or tensor representing an image batch to mask `image`. White pixels in the mask
1013
1039
  are repainted while black pixels are preserved. If `mask_image` is a PIL image, it is converted to a
1014
1040
  single channel (luminance) before use. If it's a numpy array or pytorch tensor, it should contain one
@@ -1020,11 +1046,12 @@ class StableDiffusionInpaintPipeline(
1020
1046
  width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
1021
1047
  The width in pixels of the generated image.
1022
1048
  padding_mask_crop (`int`, *optional*, defaults to `None`):
1023
- The size of margin in the crop to be applied to the image and masking. If `None`, no crop is applied to image and mask_image. If
1024
- `padding_mask_crop` is not `None`, it will first find a rectangular region with the same aspect ration of the image and
1025
- contains all masked area, and then expand that area based on `padding_mask_crop`. The image and mask_image will then be cropped based on
1026
- the expanded area before resizing to the original image size for inpainting. This is useful when the masked area is small while the image is large
1027
- and contain information inreleant for inpainging, such as background.
1049
+ The size of margin in the crop to be applied to the image and masking. If `None`, no crop is applied to
1050
+ image and mask_image. If `padding_mask_crop` is not `None`, it will first find a rectangular region
1051
+ with the same aspect ration of the image and contains all masked area, and then expand that area based
1052
+ on `padding_mask_crop`. The image and mask_image will then be cropped based on the expanded area before
1053
+ resizing to the original image size for inpainting. This is useful when the masked area is small while
1054
+ the image is large and contain information irrelevant for inpainting, such as background.
1028
1055
  strength (`float`, *optional*, defaults to 1.0):
1029
1056
  Indicates extent to transform the reference `image`. Must be between 0 and 1. `image` is used as a
1030
1057
  starting point and more noise is added the higher the `strength`. The number of denoising steps depends
@@ -1038,6 +1065,10 @@ class StableDiffusionInpaintPipeline(
1038
1065
  Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
1039
1066
  in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
1040
1067
  passed will be used. Must be in descending order.
1068
+ sigmas (`List[float]`, *optional*):
1069
+ Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
1070
+ their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
1071
+ will be used.
1041
1072
  guidance_scale (`float`, *optional*, defaults to 7.5):
1042
1073
  A higher guidance scale value encourages the model to generate images closely linked to the text
1043
1074
  `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
@@ -1052,22 +1083,22 @@ class StableDiffusionInpaintPipeline(
1052
1083
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
1053
1084
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
1054
1085
  generation deterministic.
1055
- latents (`torch.FloatTensor`, *optional*):
1086
+ latents (`torch.Tensor`, *optional*):
1056
1087
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
1057
1088
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
1058
1089
  tensor is generated by sampling using the supplied random `generator`.
1059
- prompt_embeds (`torch.FloatTensor`, *optional*):
1090
+ prompt_embeds (`torch.Tensor`, *optional*):
1060
1091
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
1061
1092
  provided, text embeddings are generated from the `prompt` input argument.
1062
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
1093
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
1063
1094
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
1064
1095
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
1065
1096
  ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
1066
- ip_adapter_image_embeds (`List[torch.FloatTensor]`, *optional*):
1067
- Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of IP-adapters.
1068
- Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should contain the negative image embedding
1069
- if `do_classifier_free_guidance` is set to `True`.
1070
- If not provided, embeddings are computed from the `ip_adapter_image` input argument.
1097
+ ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
1098
+ Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
1099
+ IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
1100
+ contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
1101
+ provided, embeddings are computed from the `ip_adapter_image` input argument.
1071
1102
  output_type (`str`, *optional*, defaults to `"pil"`):
1072
1103
  The output format of the generated image. Choose between `PIL.Image` or `np.array`.
1073
1104
  return_dict (`bool`, *optional*, defaults to `True`):
@@ -1079,11 +1110,11 @@ class StableDiffusionInpaintPipeline(
1079
1110
  clip_skip (`int`, *optional*):
1080
1111
  Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
1081
1112
  the output of the pre-final layer will be used for computing the prompt embeddings.
1082
- callback_on_step_end (`Callable`, *optional*):
1083
- A function that calls at the end of each denoising steps during the inference. The function is called
1084
- with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
1085
- callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
1086
- `callback_on_step_end_tensor_inputs`.
1113
+ callback_on_step_end (`Callable`, `PipelineCallback`, `MultiPipelineCallbacks`, *optional*):
1114
+ A function or a subclass of `PipelineCallback` or `MultiPipelineCallbacks` that is called at the end of
1115
+ each denoising step during the inference. with the following arguments: `callback_on_step_end(self:
1116
+ DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)`. `callback_kwargs` will include a
1117
+ list of all tensors as specified by `callback_on_step_end_tensor_inputs`.
1087
1118
  callback_on_step_end_tensor_inputs (`List`, *optional*):
1088
1119
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
1089
1120
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
@@ -1143,6 +1174,9 @@ class StableDiffusionInpaintPipeline(
1143
1174
  "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
1144
1175
  )
1145
1176
 
1177
+ if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
1178
+ callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
1179
+
1146
1180
  # 0. Default height and width to unet
1147
1181
  height = height or self.unet.config.sample_size * self.vae_scale_factor
1148
1182
  width = width or self.unet.config.sample_size * self.vae_scale_factor
@@ -1212,7 +1246,9 @@ class StableDiffusionInpaintPipeline(
1212
1246
  )
1213
1247
 
1214
1248
  # 4. set timesteps
1215
- timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
1249
+ timesteps, num_inference_steps = retrieve_timesteps(
1250
+ self.scheduler, num_inference_steps, device, timesteps, sigmas
1251
+ )
1216
1252
  timesteps, num_inference_steps = self.get_timesteps(
1217
1253
  num_inference_steps=num_inference_steps, strength=strength, device=device
1218
1254
  )