diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. diffusers/__init__.py +18 -1
  2. diffusers/callbacks.py +156 -0
  3. diffusers/commands/env.py +110 -6
  4. diffusers/configuration_utils.py +16 -11
  5. diffusers/dependency_versions_table.py +2 -1
  6. diffusers/image_processor.py +158 -45
  7. diffusers/loaders/__init__.py +2 -5
  8. diffusers/loaders/autoencoder.py +4 -4
  9. diffusers/loaders/controlnet.py +4 -4
  10. diffusers/loaders/ip_adapter.py +80 -22
  11. diffusers/loaders/lora.py +134 -20
  12. diffusers/loaders/lora_conversion_utils.py +46 -43
  13. diffusers/loaders/peft.py +4 -3
  14. diffusers/loaders/single_file.py +401 -170
  15. diffusers/loaders/single_file_model.py +290 -0
  16. diffusers/loaders/single_file_utils.py +616 -672
  17. diffusers/loaders/textual_inversion.py +41 -20
  18. diffusers/loaders/unet.py +168 -115
  19. diffusers/loaders/unet_loader_utils.py +163 -0
  20. diffusers/models/__init__.py +2 -0
  21. diffusers/models/activations.py +11 -3
  22. diffusers/models/attention.py +10 -11
  23. diffusers/models/attention_processor.py +367 -148
  24. diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
  25. diffusers/models/autoencoders/autoencoder_kl.py +18 -19
  26. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
  27. diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
  28. diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
  29. diffusers/models/autoencoders/vae.py +23 -24
  30. diffusers/models/controlnet.py +12 -9
  31. diffusers/models/controlnet_flax.py +4 -4
  32. diffusers/models/controlnet_xs.py +1915 -0
  33. diffusers/models/downsampling.py +17 -18
  34. diffusers/models/embeddings.py +147 -24
  35. diffusers/models/model_loading_utils.py +149 -0
  36. diffusers/models/modeling_flax_pytorch_utils.py +2 -1
  37. diffusers/models/modeling_flax_utils.py +4 -4
  38. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  39. diffusers/models/modeling_utils.py +118 -98
  40. diffusers/models/resnet.py +18 -23
  41. diffusers/models/transformer_temporal.py +3 -3
  42. diffusers/models/transformers/dual_transformer_2d.py +4 -4
  43. diffusers/models/transformers/prior_transformer.py +7 -7
  44. diffusers/models/transformers/t5_film_transformer.py +17 -19
  45. diffusers/models/transformers/transformer_2d.py +272 -156
  46. diffusers/models/transformers/transformer_temporal.py +10 -10
  47. diffusers/models/unets/unet_1d.py +5 -5
  48. diffusers/models/unets/unet_1d_blocks.py +29 -29
  49. diffusers/models/unets/unet_2d.py +6 -6
  50. diffusers/models/unets/unet_2d_blocks.py +137 -128
  51. diffusers/models/unets/unet_2d_condition.py +20 -15
  52. diffusers/models/unets/unet_2d_condition_flax.py +6 -5
  53. diffusers/models/unets/unet_3d_blocks.py +79 -77
  54. diffusers/models/unets/unet_3d_condition.py +13 -9
  55. diffusers/models/unets/unet_i2vgen_xl.py +14 -13
  56. diffusers/models/unets/unet_kandinsky3.py +1 -1
  57. diffusers/models/unets/unet_motion_model.py +114 -14
  58. diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
  59. diffusers/models/unets/unet_stable_cascade.py +16 -13
  60. diffusers/models/upsampling.py +17 -20
  61. diffusers/models/vq_model.py +16 -15
  62. diffusers/pipelines/__init__.py +25 -3
  63. diffusers/pipelines/amused/pipeline_amused.py +12 -12
  64. diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
  65. diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
  66. diffusers/pipelines/animatediff/__init__.py +2 -0
  67. diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
  68. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
  69. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
  70. diffusers/pipelines/animatediff/pipeline_output.py +3 -2
  71. diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
  72. diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
  73. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
  74. diffusers/pipelines/auto_pipeline.py +21 -17
  75. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  76. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
  77. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
  78. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
  79. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
  80. diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
  81. diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
  82. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
  83. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
  84. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
  85. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
  86. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
  87. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
  88. diffusers/pipelines/controlnet_xs/__init__.py +68 -0
  89. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
  90. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
  91. diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
  92. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
  93. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
  94. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
  95. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
  96. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
  97. diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
  98. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
  99. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
  100. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
  101. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
  102. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
  103. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -21
  104. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
  105. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
  106. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
  107. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
  108. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
  109. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
  110. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
  111. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
  112. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
  113. diffusers/pipelines/dit/pipeline_dit.py +3 -0
  114. diffusers/pipelines/free_init_utils.py +39 -38
  115. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
  116. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
  117. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
  118. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
  119. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
  120. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
  121. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  122. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
  123. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
  124. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
  125. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
  126. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
  127. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
  128. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
  129. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
  130. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
  131. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
  132. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
  133. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
  134. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
  135. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
  136. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
  137. diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
  138. diffusers/pipelines/marigold/__init__.py +50 -0
  139. diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
  140. diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
  141. diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
  142. diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
  143. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
  144. diffusers/pipelines/pia/pipeline_pia.py +39 -125
  145. diffusers/pipelines/pipeline_flax_utils.py +4 -4
  146. diffusers/pipelines/pipeline_loading_utils.py +268 -23
  147. diffusers/pipelines/pipeline_utils.py +266 -37
  148. diffusers/pipelines/pixart_alpha/__init__.py +8 -1
  149. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
  150. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
  151. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
  152. diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
  153. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
  154. diffusers/pipelines/shap_e/renderer.py +1 -1
  155. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +36 -22
  156. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
  157. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
  158. diffusers/pipelines/stable_diffusion/__init__.py +0 -1
  159. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
  160. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  161. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
  162. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
  163. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
  164. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
  165. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
  166. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
  167. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
  168. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
  169. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
  170. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
  171. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
  172. diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
  173. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
  174. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -42
  175. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
  176. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
  177. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
  178. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
  179. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
  180. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
  181. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
  182. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  183. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
  184. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
  185. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
  186. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
  187. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
  188. diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
  189. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
  190. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
  191. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
  192. diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
  193. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
  194. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
  195. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
  196. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
  197. diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
  198. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
  199. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
  200. diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
  201. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
  202. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
  203. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
  204. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
  205. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
  206. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
  207. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
  208. diffusers/schedulers/__init__.py +2 -2
  209. diffusers/schedulers/deprecated/__init__.py +1 -1
  210. diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
  211. diffusers/schedulers/scheduling_amused.py +5 -5
  212. diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
  213. diffusers/schedulers/scheduling_consistency_models.py +23 -25
  214. diffusers/schedulers/scheduling_ddim.py +22 -24
  215. diffusers/schedulers/scheduling_ddim_flax.py +2 -1
  216. diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
  217. diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
  218. diffusers/schedulers/scheduling_ddpm.py +20 -22
  219. diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
  220. diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
  221. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
  222. diffusers/schedulers/scheduling_deis_multistep.py +46 -42
  223. diffusers/schedulers/scheduling_dpmsolver_multistep.py +107 -77
  224. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
  225. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
  226. diffusers/schedulers/scheduling_dpmsolver_sde.py +26 -22
  227. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +90 -65
  228. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +78 -53
  229. diffusers/schedulers/scheduling_edm_euler.py +53 -30
  230. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +26 -28
  231. diffusers/schedulers/scheduling_euler_discrete.py +163 -67
  232. diffusers/schedulers/scheduling_heun_discrete.py +60 -38
  233. diffusers/schedulers/scheduling_ipndm.py +8 -8
  234. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +22 -18
  235. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +22 -18
  236. diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
  237. diffusers/schedulers/scheduling_lcm.py +21 -23
  238. diffusers/schedulers/scheduling_lms_discrete.py +27 -25
  239. diffusers/schedulers/scheduling_pndm.py +20 -20
  240. diffusers/schedulers/scheduling_repaint.py +20 -20
  241. diffusers/schedulers/scheduling_sasolver.py +55 -54
  242. diffusers/schedulers/scheduling_sde_ve.py +19 -19
  243. diffusers/schedulers/scheduling_tcd.py +39 -30
  244. diffusers/schedulers/scheduling_unclip.py +15 -15
  245. diffusers/schedulers/scheduling_unipc_multistep.py +115 -41
  246. diffusers/schedulers/scheduling_utils.py +14 -5
  247. diffusers/schedulers/scheduling_utils_flax.py +3 -3
  248. diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
  249. diffusers/training_utils.py +56 -1
  250. diffusers/utils/__init__.py +7 -0
  251. diffusers/utils/doc_utils.py +1 -0
  252. diffusers/utils/dummy_pt_objects.py +30 -0
  253. diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
  254. diffusers/utils/dynamic_modules_utils.py +24 -11
  255. diffusers/utils/hub_utils.py +3 -2
  256. diffusers/utils/import_utils.py +91 -0
  257. diffusers/utils/loading_utils.py +2 -2
  258. diffusers/utils/logging.py +1 -1
  259. diffusers/utils/peft_utils.py +32 -5
  260. diffusers/utils/state_dict_utils.py +11 -2
  261. diffusers/utils/testing_utils.py +71 -6
  262. diffusers/utils/torch_utils.py +1 -0
  263. diffusers/video_processor.py +113 -0
  264. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/METADATA +7 -7
  265. diffusers-0.28.0.dist-info/RECORD +414 -0
  266. diffusers-0.27.1.dist-info/RECORD +0 -399
  267. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
  268. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/WHEEL +0 -0
  269. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
  270. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -25,6 +25,7 @@ from transformers import (
25
25
  CLIPVisionModelWithProjection,
26
26
  )
27
27
 
28
+ from ...callbacks import MultiPipelineCallbacks, PipelineCallback
28
29
  from ...image_processor import PipelineImageInput, VaeImageProcessor
29
30
  from ...loaders import (
30
31
  FromSingleFileMixin,
@@ -124,6 +125,7 @@ def retrieve_timesteps(
124
125
  num_inference_steps: Optional[int] = None,
125
126
  device: Optional[Union[str, torch.device]] = None,
126
127
  timesteps: Optional[List[int]] = None,
128
+ sigmas: Optional[List[float]] = None,
127
129
  **kwargs,
128
130
  ):
129
131
  """
@@ -134,19 +136,23 @@ def retrieve_timesteps(
134
136
  scheduler (`SchedulerMixin`):
135
137
  The scheduler to get timesteps from.
136
138
  num_inference_steps (`int`):
137
- The number of diffusion steps used when generating samples with a pre-trained model. If used,
138
- `timesteps` must be `None`.
139
+ The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
140
+ must be `None`.
139
141
  device (`str` or `torch.device`, *optional*):
140
142
  The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
141
143
  timesteps (`List[int]`, *optional*):
142
- Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
143
- timestep spacing strategy of the scheduler is used. If `timesteps` is passed, `num_inference_steps`
144
- must be `None`.
144
+ Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
145
+ `num_inference_steps` and `sigmas` must be `None`.
146
+ sigmas (`List[float]`, *optional*):
147
+ Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
148
+ `num_inference_steps` and `timesteps` must be `None`.
145
149
 
146
150
  Returns:
147
151
  `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
148
152
  second element is the number of inference steps.
149
153
  """
154
+ if timesteps is not None and sigmas is not None:
155
+ raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
150
156
  if timesteps is not None:
151
157
  accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
152
158
  if not accepts_timesteps:
@@ -157,6 +163,16 @@ def retrieve_timesteps(
157
163
  scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
158
164
  timesteps = scheduler.timesteps
159
165
  num_inference_steps = len(timesteps)
166
+ elif sigmas is not None:
167
+ accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
168
+ if not accept_sigmas:
169
+ raise ValueError(
170
+ f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
171
+ f" sigmas schedules. Please check whether you are using the correct scheduler."
172
+ )
173
+ scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
174
+ timesteps = scheduler.timesteps
175
+ num_inference_steps = len(timesteps)
160
176
  else:
161
177
  scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
162
178
  timesteps = scheduler.timesteps
@@ -288,10 +304,10 @@ class StableDiffusionXLImg2ImgPipeline(
288
304
  do_classifier_free_guidance: bool = True,
289
305
  negative_prompt: Optional[str] = None,
290
306
  negative_prompt_2: Optional[str] = None,
291
- prompt_embeds: Optional[torch.FloatTensor] = None,
292
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
293
- pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
294
- negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
307
+ prompt_embeds: Optional[torch.Tensor] = None,
308
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
309
+ pooled_prompt_embeds: Optional[torch.Tensor] = None,
310
+ negative_pooled_prompt_embeds: Optional[torch.Tensor] = None,
295
311
  lora_scale: Optional[float] = None,
296
312
  clip_skip: Optional[int] = None,
297
313
  ):
@@ -317,17 +333,17 @@ class StableDiffusionXLImg2ImgPipeline(
317
333
  negative_prompt_2 (`str` or `List[str]`, *optional*):
318
334
  The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
319
335
  `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
320
- prompt_embeds (`torch.FloatTensor`, *optional*):
336
+ prompt_embeds (`torch.Tensor`, *optional*):
321
337
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
322
338
  provided, text embeddings will be generated from `prompt` input argument.
323
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
339
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
324
340
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
325
341
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
326
342
  argument.
327
- pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
343
+ pooled_prompt_embeds (`torch.Tensor`, *optional*):
328
344
  Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
329
345
  If not provided, pooled text embeddings will be generated from `prompt` input argument.
330
- negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
346
+ negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
331
347
  Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
332
348
  weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
333
349
  input argument.
@@ -647,7 +663,7 @@ class StableDiffusionXLImg2ImgPipeline(
647
663
  # because `num_inference_steps` might be even given that every timestep
648
664
  # (except the highest one) is duplicated. If `num_inference_steps` is even it would
649
665
  # mean that we cut the timesteps in the middle of the denoising step
650
- # (between 1st and 2nd devirative) which leads to incorrect results. By adding 1
666
+ # (between 1st and 2nd derivative) which leads to incorrect results. By adding 1
651
667
  # we ensure that the denoising process always ends after the 2nd derivate step of the scheduler
652
668
  num_inference_steps = num_inference_steps + 1
653
669
 
@@ -665,6 +681,12 @@ class StableDiffusionXLImg2ImgPipeline(
665
681
  f"`image` has to be of type `torch.Tensor`, `PIL.Image.Image` or list but is {type(image)}"
666
682
  )
667
683
 
684
+ latents_mean = latents_std = None
685
+ if hasattr(self.vae.config, "latents_mean") and self.vae.config.latents_mean is not None:
686
+ latents_mean = torch.tensor(self.vae.config.latents_mean).view(1, 4, 1, 1)
687
+ if hasattr(self.vae.config, "latents_std") and self.vae.config.latents_std is not None:
688
+ latents_std = torch.tensor(self.vae.config.latents_std).view(1, 4, 1, 1)
689
+
668
690
  # Offload text encoder if `enable_model_cpu_offload` was enabled
669
691
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
670
692
  self.text_encoder_2.to("cpu")
@@ -702,7 +724,12 @@ class StableDiffusionXLImg2ImgPipeline(
702
724
  self.vae.to(dtype)
703
725
 
704
726
  init_latents = init_latents.to(dtype)
705
- init_latents = self.vae.config.scaling_factor * init_latents
727
+ if latents_mean is not None and latents_std is not None:
728
+ latents_mean = latents_mean.to(device=self.device, dtype=dtype)
729
+ latents_std = latents_std.to(device=self.device, dtype=dtype)
730
+ init_latents = (init_latents - latents_mean) * self.vae.config.scaling_factor / latents_std
731
+ else:
732
+ init_latents = self.vae.config.scaling_factor * init_latents
706
733
 
707
734
  if batch_size > init_latents.shape[0] and batch_size % init_latents.shape[0] == 0:
708
735
  # expand init_latents for batch_size
@@ -874,20 +901,22 @@ class StableDiffusionXLImg2ImgPipeline(
874
901
  self.vae.decoder.mid_block.to(dtype)
875
902
 
876
903
  # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
877
- def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
904
+ def get_guidance_scale_embedding(
905
+ self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
906
+ ) -> torch.Tensor:
878
907
  """
879
908
  See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
880
909
 
881
910
  Args:
882
- timesteps (`torch.Tensor`):
883
- generate embedding vectors at these timesteps
911
+ w (`torch.Tensor`):
912
+ Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
884
913
  embedding_dim (`int`, *optional*, defaults to 512):
885
- dimension of the embeddings to generate
886
- dtype:
887
- data type of the generated embeddings
914
+ Dimension of the embeddings to generate.
915
+ dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
916
+ Data type of the generated embeddings.
888
917
 
889
918
  Returns:
890
- `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
919
+ `torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
891
920
  """
892
921
  assert len(w.shape) == 1
893
922
  w = w * 1000.0
@@ -951,6 +980,7 @@ class StableDiffusionXLImg2ImgPipeline(
951
980
  strength: float = 0.3,
952
981
  num_inference_steps: int = 50,
953
982
  timesteps: List[int] = None,
983
+ sigmas: List[float] = None,
954
984
  denoising_start: Optional[float] = None,
955
985
  denoising_end: Optional[float] = None,
956
986
  guidance_scale: float = 5.0,
@@ -959,13 +989,13 @@ class StableDiffusionXLImg2ImgPipeline(
959
989
  num_images_per_prompt: Optional[int] = 1,
960
990
  eta: float = 0.0,
961
991
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
962
- latents: Optional[torch.FloatTensor] = None,
963
- prompt_embeds: Optional[torch.FloatTensor] = None,
964
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
965
- pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
966
- negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
992
+ latents: Optional[torch.Tensor] = None,
993
+ prompt_embeds: Optional[torch.Tensor] = None,
994
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
995
+ pooled_prompt_embeds: Optional[torch.Tensor] = None,
996
+ negative_pooled_prompt_embeds: Optional[torch.Tensor] = None,
967
997
  ip_adapter_image: Optional[PipelineImageInput] = None,
968
- ip_adapter_image_embeds: Optional[List[torch.FloatTensor]] = None,
998
+ ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
969
999
  output_type: Optional[str] = "pil",
970
1000
  return_dict: bool = True,
971
1001
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
@@ -979,7 +1009,9 @@ class StableDiffusionXLImg2ImgPipeline(
979
1009
  aesthetic_score: float = 6.0,
980
1010
  negative_aesthetic_score: float = 2.5,
981
1011
  clip_skip: Optional[int] = None,
982
- callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
1012
+ callback_on_step_end: Optional[
1013
+ Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
1014
+ ] = None,
983
1015
  callback_on_step_end_tensor_inputs: List[str] = ["latents"],
984
1016
  **kwargs,
985
1017
  ):
@@ -993,7 +1025,7 @@ class StableDiffusionXLImg2ImgPipeline(
993
1025
  prompt_2 (`str` or `List[str]`, *optional*):
994
1026
  The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
995
1027
  used in both text-encoders
996
- image (`torch.FloatTensor` or `PIL.Image.Image` or `np.ndarray` or `List[torch.FloatTensor]` or `List[PIL.Image.Image]` or `List[np.ndarray]`):
1028
+ image (`torch.Tensor` or `PIL.Image.Image` or `np.ndarray` or `List[torch.Tensor]` or `List[PIL.Image.Image]` or `List[np.ndarray]`):
997
1029
  The image(s) to modify with the pipeline.
998
1030
  strength (`float`, *optional*, defaults to 0.3):
999
1031
  Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image`
@@ -1009,6 +1041,10 @@ class StableDiffusionXLImg2ImgPipeline(
1009
1041
  Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
1010
1042
  in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
1011
1043
  passed will be used. Must be in descending order.
1044
+ sigmas (`List[float]`, *optional*):
1045
+ Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
1046
+ their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
1047
+ will be used.
1012
1048
  denoising_start (`float`, *optional*):
1013
1049
  When specified, indicates the fraction (between 0.0 and 1.0) of the total denoising process to be
1014
1050
  bypassed before it is initiated. Consequently, the initial part of the denoising process is skipped and
@@ -1045,30 +1081,30 @@ class StableDiffusionXLImg2ImgPipeline(
1045
1081
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
1046
1082
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
1047
1083
  to make generation deterministic.
1048
- latents (`torch.FloatTensor`, *optional*):
1084
+ latents (`torch.Tensor`, *optional*):
1049
1085
  Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
1050
1086
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
1051
1087
  tensor will ge generated by sampling using the supplied random `generator`.
1052
- prompt_embeds (`torch.FloatTensor`, *optional*):
1088
+ prompt_embeds (`torch.Tensor`, *optional*):
1053
1089
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
1054
1090
  provided, text embeddings will be generated from `prompt` input argument.
1055
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
1091
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
1056
1092
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
1057
1093
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
1058
1094
  argument.
1059
- pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
1095
+ pooled_prompt_embeds (`torch.Tensor`, *optional*):
1060
1096
  Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
1061
1097
  If not provided, pooled text embeddings will be generated from `prompt` input argument.
1062
- negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
1098
+ negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
1063
1099
  Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
1064
1100
  weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
1065
1101
  input argument.
1066
1102
  ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
1067
- ip_adapter_image_embeds (`List[torch.FloatTensor]`, *optional*):
1068
- Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of IP-adapters.
1069
- Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should contain the negative image embedding
1070
- if `do_classifier_free_guidance` is set to `True`.
1071
- If not provided, embeddings are computed from the `ip_adapter_image` input argument.
1103
+ ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
1104
+ Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
1105
+ IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
1106
+ contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
1107
+ provided, embeddings are computed from the `ip_adapter_image` input argument.
1072
1108
  output_type (`str`, *optional*, defaults to `"pil"`):
1073
1109
  The output format of the generate image. Choose between
1074
1110
  [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -1124,11 +1160,11 @@ class StableDiffusionXLImg2ImgPipeline(
1124
1160
  clip_skip (`int`, *optional*):
1125
1161
  Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
1126
1162
  the output of the pre-final layer will be used for computing the prompt embeddings.
1127
- callback_on_step_end (`Callable`, *optional*):
1128
- A function that calls at the end of each denoising steps during the inference. The function is called
1129
- with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
1130
- callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
1131
- `callback_on_step_end_tensor_inputs`.
1163
+ callback_on_step_end (`Callable`, `PipelineCallback`, `MultiPipelineCallbacks`, *optional*):
1164
+ A function or a subclass of `PipelineCallback` or `MultiPipelineCallbacks` that is called at the end of
1165
+ each denoising step during the inference. with the following arguments: `callback_on_step_end(self:
1166
+ DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)`. `callback_kwargs` will include a
1167
+ list of all tensors as specified by `callback_on_step_end_tensor_inputs`.
1132
1168
  callback_on_step_end_tensor_inputs (`List`, *optional*):
1133
1169
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
1134
1170
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
@@ -1158,6 +1194,9 @@ class StableDiffusionXLImg2ImgPipeline(
1158
1194
  "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
1159
1195
  )
1160
1196
 
1197
+ if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
1198
+ callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
1199
+
1161
1200
  # 1. Check inputs. Raise error if not correct
1162
1201
  self.check_inputs(
1163
1202
  prompt,
@@ -1224,7 +1263,9 @@ class StableDiffusionXLImg2ImgPipeline(
1224
1263
  def denoising_value_valid(dnv):
1225
1264
  return isinstance(dnv, float) and 0 < dnv < 1
1226
1265
 
1227
- timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
1266
+ timesteps, num_inference_steps = retrieve_timesteps(
1267
+ self.scheduler, num_inference_steps, device, timesteps, sigmas
1268
+ )
1228
1269
  timesteps, num_inference_steps = self.get_timesteps(
1229
1270
  num_inference_steps,
1230
1271
  strength,
@@ -1234,17 +1275,19 @@ class StableDiffusionXLImg2ImgPipeline(
1234
1275
  latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
1235
1276
 
1236
1277
  add_noise = True if self.denoising_start is None else False
1278
+
1237
1279
  # 6. Prepare latent variables
1238
- latents = self.prepare_latents(
1239
- image,
1240
- latent_timestep,
1241
- batch_size,
1242
- num_images_per_prompt,
1243
- prompt_embeds.dtype,
1244
- device,
1245
- generator,
1246
- add_noise,
1247
- )
1280
+ if latents is None:
1281
+ latents = self.prepare_latents(
1282
+ image,
1283
+ latent_timestep,
1284
+ batch_size,
1285
+ num_images_per_prompt,
1286
+ prompt_embeds.dtype,
1287
+ device,
1288
+ generator,
1289
+ add_noise,
1290
+ )
1248
1291
  # 7. Prepare extra step kwargs.
1249
1292
  extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
1250
1293
 
@@ -1368,7 +1411,12 @@ class StableDiffusionXLImg2ImgPipeline(
1368
1411
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
1369
1412
 
1370
1413
  # compute the previous noisy sample x_t -> x_t-1
1414
+ latents_dtype = latents.dtype
1371
1415
  latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
1416
+ if latents.dtype != latents_dtype:
1417
+ if torch.backends.mps.is_available():
1418
+ # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
1419
+ latents = latents.to(latents_dtype)
1372
1420
 
1373
1421
  if callback_on_step_end is not None:
1374
1422
  callback_kwargs = {}
@@ -1403,6 +1451,10 @@ class StableDiffusionXLImg2ImgPipeline(
1403
1451
  if needs_upcasting:
1404
1452
  self.upcast_vae()
1405
1453
  latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
1454
+ elif latents.dtype != self.vae.dtype:
1455
+ if torch.backends.mps.is_available():
1456
+ # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
1457
+ self.vae = self.vae.to(latents.dtype)
1406
1458
 
1407
1459
  # unscale/denormalize the latents
1408
1460
  # denormalize with the mean and std if available and not None