diffusers 0.27.2__py3-none-any.whl → 0.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. diffusers/__init__.py +18 -1
  2. diffusers/callbacks.py +156 -0
  3. diffusers/commands/env.py +110 -6
  4. diffusers/configuration_utils.py +16 -11
  5. diffusers/dependency_versions_table.py +2 -1
  6. diffusers/image_processor.py +158 -45
  7. diffusers/loaders/__init__.py +2 -5
  8. diffusers/loaders/autoencoder.py +4 -4
  9. diffusers/loaders/controlnet.py +4 -4
  10. diffusers/loaders/ip_adapter.py +80 -22
  11. diffusers/loaders/lora.py +134 -20
  12. diffusers/loaders/lora_conversion_utils.py +46 -43
  13. diffusers/loaders/peft.py +4 -3
  14. diffusers/loaders/single_file.py +401 -170
  15. diffusers/loaders/single_file_model.py +290 -0
  16. diffusers/loaders/single_file_utils.py +616 -672
  17. diffusers/loaders/textual_inversion.py +41 -20
  18. diffusers/loaders/unet.py +168 -115
  19. diffusers/loaders/unet_loader_utils.py +163 -0
  20. diffusers/models/__init__.py +2 -0
  21. diffusers/models/activations.py +11 -3
  22. diffusers/models/attention.py +10 -11
  23. diffusers/models/attention_processor.py +367 -148
  24. diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
  25. diffusers/models/autoencoders/autoencoder_kl.py +18 -19
  26. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
  27. diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
  28. diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
  29. diffusers/models/autoencoders/vae.py +23 -24
  30. diffusers/models/controlnet.py +12 -9
  31. diffusers/models/controlnet_flax.py +4 -4
  32. diffusers/models/controlnet_xs.py +1915 -0
  33. diffusers/models/downsampling.py +17 -18
  34. diffusers/models/embeddings.py +147 -24
  35. diffusers/models/model_loading_utils.py +149 -0
  36. diffusers/models/modeling_flax_pytorch_utils.py +2 -1
  37. diffusers/models/modeling_flax_utils.py +4 -4
  38. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  39. diffusers/models/modeling_utils.py +118 -98
  40. diffusers/models/resnet.py +18 -23
  41. diffusers/models/transformer_temporal.py +3 -3
  42. diffusers/models/transformers/dual_transformer_2d.py +4 -4
  43. diffusers/models/transformers/prior_transformer.py +7 -7
  44. diffusers/models/transformers/t5_film_transformer.py +17 -19
  45. diffusers/models/transformers/transformer_2d.py +272 -156
  46. diffusers/models/transformers/transformer_temporal.py +10 -10
  47. diffusers/models/unets/unet_1d.py +5 -5
  48. diffusers/models/unets/unet_1d_blocks.py +29 -29
  49. diffusers/models/unets/unet_2d.py +6 -6
  50. diffusers/models/unets/unet_2d_blocks.py +137 -128
  51. diffusers/models/unets/unet_2d_condition.py +19 -15
  52. diffusers/models/unets/unet_2d_condition_flax.py +6 -5
  53. diffusers/models/unets/unet_3d_blocks.py +79 -77
  54. diffusers/models/unets/unet_3d_condition.py +13 -9
  55. diffusers/models/unets/unet_i2vgen_xl.py +14 -13
  56. diffusers/models/unets/unet_kandinsky3.py +1 -1
  57. diffusers/models/unets/unet_motion_model.py +114 -14
  58. diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
  59. diffusers/models/unets/unet_stable_cascade.py +16 -13
  60. diffusers/models/upsampling.py +17 -20
  61. diffusers/models/vq_model.py +16 -15
  62. diffusers/pipelines/__init__.py +25 -3
  63. diffusers/pipelines/amused/pipeline_amused.py +12 -12
  64. diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
  65. diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
  66. diffusers/pipelines/animatediff/__init__.py +2 -0
  67. diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
  68. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
  69. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
  70. diffusers/pipelines/animatediff/pipeline_output.py +3 -2
  71. diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
  72. diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
  73. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
  74. diffusers/pipelines/auto_pipeline.py +21 -17
  75. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  76. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
  77. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
  78. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
  79. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
  80. diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
  81. diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
  82. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
  83. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
  84. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
  85. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
  86. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
  87. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
  88. diffusers/pipelines/controlnet_xs/__init__.py +68 -0
  89. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
  90. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
  91. diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
  92. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
  93. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
  94. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
  95. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
  96. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
  97. diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
  98. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
  99. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
  100. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
  101. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
  102. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
  103. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -18
  104. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
  105. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
  106. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
  107. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
  108. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
  109. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
  110. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
  111. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
  112. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
  113. diffusers/pipelines/dit/pipeline_dit.py +3 -0
  114. diffusers/pipelines/free_init_utils.py +39 -38
  115. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
  116. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
  117. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
  118. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
  119. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
  120. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
  121. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  122. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
  123. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
  124. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
  125. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
  126. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
  127. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
  128. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
  129. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
  130. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
  131. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
  132. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
  133. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
  134. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
  135. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
  136. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
  137. diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
  138. diffusers/pipelines/marigold/__init__.py +50 -0
  139. diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
  140. diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
  141. diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
  142. diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
  143. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
  144. diffusers/pipelines/pia/pipeline_pia.py +39 -125
  145. diffusers/pipelines/pipeline_flax_utils.py +4 -4
  146. diffusers/pipelines/pipeline_loading_utils.py +268 -23
  147. diffusers/pipelines/pipeline_utils.py +266 -37
  148. diffusers/pipelines/pixart_alpha/__init__.py +8 -1
  149. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
  150. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
  151. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
  152. diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
  153. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
  154. diffusers/pipelines/shap_e/renderer.py +1 -1
  155. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +18 -18
  156. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
  157. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
  158. diffusers/pipelines/stable_diffusion/__init__.py +0 -1
  159. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
  160. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  161. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
  162. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
  163. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
  164. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
  165. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
  166. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
  167. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
  168. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
  169. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
  170. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
  171. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
  172. diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
  173. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
  174. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -39
  175. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
  176. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
  177. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
  178. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
  179. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
  180. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
  181. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
  182. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  183. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
  184. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
  185. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
  186. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
  187. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
  188. diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
  189. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
  190. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
  191. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
  192. diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
  193. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
  194. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
  195. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
  196. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
  197. diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
  198. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
  199. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
  200. diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
  201. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
  202. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
  203. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
  204. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
  205. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
  206. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
  207. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
  208. diffusers/schedulers/__init__.py +2 -2
  209. diffusers/schedulers/deprecated/__init__.py +1 -1
  210. diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
  211. diffusers/schedulers/scheduling_amused.py +5 -5
  212. diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
  213. diffusers/schedulers/scheduling_consistency_models.py +20 -26
  214. diffusers/schedulers/scheduling_ddim.py +22 -24
  215. diffusers/schedulers/scheduling_ddim_flax.py +2 -1
  216. diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
  217. diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
  218. diffusers/schedulers/scheduling_ddpm.py +20 -22
  219. diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
  220. diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
  221. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
  222. diffusers/schedulers/scheduling_deis_multistep.py +42 -42
  223. diffusers/schedulers/scheduling_dpmsolver_multistep.py +103 -77
  224. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
  225. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
  226. diffusers/schedulers/scheduling_dpmsolver_sde.py +23 -23
  227. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +86 -65
  228. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +75 -54
  229. diffusers/schedulers/scheduling_edm_euler.py +50 -31
  230. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +23 -29
  231. diffusers/schedulers/scheduling_euler_discrete.py +160 -68
  232. diffusers/schedulers/scheduling_heun_discrete.py +57 -39
  233. diffusers/schedulers/scheduling_ipndm.py +8 -8
  234. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +19 -19
  235. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +19 -19
  236. diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
  237. diffusers/schedulers/scheduling_lcm.py +21 -23
  238. diffusers/schedulers/scheduling_lms_discrete.py +24 -26
  239. diffusers/schedulers/scheduling_pndm.py +20 -20
  240. diffusers/schedulers/scheduling_repaint.py +20 -20
  241. diffusers/schedulers/scheduling_sasolver.py +55 -54
  242. diffusers/schedulers/scheduling_sde_ve.py +19 -19
  243. diffusers/schedulers/scheduling_tcd.py +39 -30
  244. diffusers/schedulers/scheduling_unclip.py +15 -15
  245. diffusers/schedulers/scheduling_unipc_multistep.py +111 -41
  246. diffusers/schedulers/scheduling_utils.py +14 -5
  247. diffusers/schedulers/scheduling_utils_flax.py +3 -3
  248. diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
  249. diffusers/training_utils.py +56 -1
  250. diffusers/utils/__init__.py +7 -0
  251. diffusers/utils/doc_utils.py +1 -0
  252. diffusers/utils/dummy_pt_objects.py +30 -0
  253. diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
  254. diffusers/utils/dynamic_modules_utils.py +24 -11
  255. diffusers/utils/hub_utils.py +3 -2
  256. diffusers/utils/import_utils.py +91 -0
  257. diffusers/utils/loading_utils.py +2 -2
  258. diffusers/utils/logging.py +1 -1
  259. diffusers/utils/peft_utils.py +32 -5
  260. diffusers/utils/state_dict_utils.py +11 -2
  261. diffusers/utils/testing_utils.py +71 -6
  262. diffusers/utils/torch_utils.py +1 -0
  263. diffusers/video_processor.py +113 -0
  264. {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/METADATA +47 -47
  265. diffusers-0.28.0.dist-info/RECORD +414 -0
  266. {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/WHEEL +1 -1
  267. diffusers-0.27.2.dist-info/RECORD +0 -399
  268. {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
  269. {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
  270. {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -119,6 +119,7 @@ def retrieve_timesteps(
119
119
  num_inference_steps: Optional[int] = None,
120
120
  device: Optional[Union[str, torch.device]] = None,
121
121
  timesteps: Optional[List[int]] = None,
122
+ sigmas: Optional[List[float]] = None,
122
123
  **kwargs,
123
124
  ):
124
125
  """
@@ -129,19 +130,23 @@ def retrieve_timesteps(
129
130
  scheduler (`SchedulerMixin`):
130
131
  The scheduler to get timesteps from.
131
132
  num_inference_steps (`int`):
132
- The number of diffusion steps used when generating samples with a pre-trained model. If used,
133
- `timesteps` must be `None`.
133
+ The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
134
+ must be `None`.
134
135
  device (`str` or `torch.device`, *optional*):
135
136
  The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
136
137
  timesteps (`List[int]`, *optional*):
137
- Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
138
- timestep spacing strategy of the scheduler is used. If `timesteps` is passed, `num_inference_steps`
139
- must be `None`.
138
+ Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
139
+ `num_inference_steps` and `sigmas` must be `None`.
140
+ sigmas (`List[float]`, *optional*):
141
+ Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
142
+ `num_inference_steps` and `timesteps` must be `None`.
140
143
 
141
144
  Returns:
142
145
  `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
143
146
  second element is the number of inference steps.
144
147
  """
148
+ if timesteps is not None and sigmas is not None:
149
+ raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
145
150
  if timesteps is not None:
146
151
  accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
147
152
  if not accepts_timesteps:
@@ -152,6 +157,16 @@ def retrieve_timesteps(
152
157
  scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
153
158
  timesteps = scheduler.timesteps
154
159
  num_inference_steps = len(timesteps)
160
+ elif sigmas is not None:
161
+ accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
162
+ if not accept_sigmas:
163
+ raise ValueError(
164
+ f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
165
+ f" sigmas schedules. Please check whether you are using the correct scheduler."
166
+ )
167
+ scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
168
+ timesteps = scheduler.timesteps
169
+ num_inference_steps = len(timesteps)
155
170
  else:
156
171
  scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
157
172
  timesteps = scheduler.timesteps
@@ -303,8 +318,8 @@ class AltDiffusionImg2ImgPipeline(
303
318
  num_images_per_prompt,
304
319
  do_classifier_free_guidance,
305
320
  negative_prompt=None,
306
- prompt_embeds: Optional[torch.FloatTensor] = None,
307
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
321
+ prompt_embeds: Optional[torch.Tensor] = None,
322
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
308
323
  lora_scale: Optional[float] = None,
309
324
  **kwargs,
310
325
  ):
@@ -335,8 +350,8 @@ class AltDiffusionImg2ImgPipeline(
335
350
  num_images_per_prompt,
336
351
  do_classifier_free_guidance,
337
352
  negative_prompt=None,
338
- prompt_embeds: Optional[torch.FloatTensor] = None,
339
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
353
+ prompt_embeds: Optional[torch.Tensor] = None,
354
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
340
355
  lora_scale: Optional[float] = None,
341
356
  clip_skip: Optional[int] = None,
342
357
  ):
@@ -356,10 +371,10 @@ class AltDiffusionImg2ImgPipeline(
356
371
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
357
372
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
358
373
  less than `1`).
359
- prompt_embeds (`torch.FloatTensor`, *optional*):
374
+ prompt_embeds (`torch.Tensor`, *optional*):
360
375
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
361
376
  provided, text embeddings will be generated from `prompt` input argument.
362
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
377
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
363
378
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
364
379
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
365
380
  argument.
@@ -706,7 +721,7 @@ class AltDiffusionImg2ImgPipeline(
706
721
  data type of the generated embeddings
707
722
 
708
723
  Returns:
709
- `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
724
+ `torch.Tensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
710
725
  """
711
726
  assert len(w.shape) == 1
712
727
  w = w * 1000.0
@@ -753,13 +768,14 @@ class AltDiffusionImg2ImgPipeline(
753
768
  strength: float = 0.8,
754
769
  num_inference_steps: Optional[int] = 50,
755
770
  timesteps: List[int] = None,
771
+ sigmas: List[float] = None,
756
772
  guidance_scale: Optional[float] = 7.5,
757
773
  negative_prompt: Optional[Union[str, List[str]]] = None,
758
774
  num_images_per_prompt: Optional[int] = 1,
759
775
  eta: Optional[float] = 0.0,
760
776
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
761
- prompt_embeds: Optional[torch.FloatTensor] = None,
762
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
777
+ prompt_embeds: Optional[torch.Tensor] = None,
778
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
763
779
  ip_adapter_image: Optional[PipelineImageInput] = None,
764
780
  output_type: Optional[str] = "pil",
765
781
  return_dict: bool = True,
@@ -775,7 +791,7 @@ class AltDiffusionImg2ImgPipeline(
775
791
  Args:
776
792
  prompt (`str` or `List[str]`, *optional*):
777
793
  The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
778
- image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
794
+ image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
779
795
  `Image`, numpy array or tensor representing an image batch to be used as the starting point. For both
780
796
  numpy array and pytorch tensor, the expected value range is between `[0, 1]` If it's a tensor or a list
781
797
  or tensors, the expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a
@@ -808,10 +824,10 @@ class AltDiffusionImg2ImgPipeline(
808
824
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
809
825
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
810
826
  generation deterministic.
811
- prompt_embeds (`torch.FloatTensor`, *optional*):
827
+ prompt_embeds (`torch.Tensor`, *optional*):
812
828
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
813
829
  provided, text embeddings are generated from the `prompt` input argument.
814
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
830
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
815
831
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
816
832
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
817
833
  ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
@@ -919,7 +935,9 @@ class AltDiffusionImg2ImgPipeline(
919
935
  image = self.image_processor.preprocess(image)
920
936
 
921
937
  # 5. set timesteps
922
- timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
938
+ timesteps, num_inference_steps = retrieve_timesteps(
939
+ self.scheduler, num_inference_steps, device, timesteps, sigmas
940
+ )
923
941
  timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
924
942
  latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
925
943
 
@@ -112,9 +112,9 @@ class RePaintPipeline(DiffusionPipeline):
112
112
  The call function to the pipeline for generation.
113
113
 
114
114
  Args:
115
- image (`torch.FloatTensor` or `PIL.Image.Image`):
115
+ image (`torch.Tensor` or `PIL.Image.Image`):
116
116
  The original image to inpaint on.
117
- mask_image (`torch.FloatTensor` or `PIL.Image.Image`):
117
+ mask_image (`torch.Tensor` or `PIL.Image.Image`):
118
118
  The mask_image where 0.0 define which part of the original image to inpaint.
119
119
  num_inference_steps (`int`, *optional*, defaults to 1000):
120
120
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
@@ -133,8 +133,8 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline):
133
133
  generator: Optional[torch.Generator] = None,
134
134
  num_inference_steps: int = 100,
135
135
  return_dict: bool = True,
136
- output_type: str = "numpy",
137
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
136
+ output_type: str = "np",
137
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
138
138
  callback_steps: int = 1,
139
139
  ) -> Union[AudioPipelineOutput, Tuple]:
140
140
  if (callback_steps is None) or (
@@ -157,11 +157,11 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline):
157
157
  expense of slower inference.
158
158
  return_dict (`bool`, *optional*, defaults to `True`):
159
159
  Whether or not to return a [`~pipelines.AudioPipelineOutput`] instead of a plain tuple.
160
- output_type (`str`, *optional*, defaults to `"numpy"`):
160
+ output_type (`str`, *optional*, defaults to `"np"`):
161
161
  The output format of the generated audio.
162
162
  callback (`Callable`, *optional*):
163
163
  A function that calls every `callback_steps` steps during inference. The function is called with the
164
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
164
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
165
165
  callback_steps (`int`, *optional*, defaults to 1):
166
166
  The frequency at which the `callback` function is called. If not specified, the callback is called at
167
167
  every step.
@@ -249,16 +249,16 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline):
249
249
 
250
250
  logger.info("Generated segment", i)
251
251
 
252
- if output_type == "numpy" and not is_onnx_available():
252
+ if output_type == "np" and not is_onnx_available():
253
253
  raise ValueError(
254
254
  "Cannot return output in 'np' format if ONNX is not available. Make sure to have ONNX installed or set 'output_type' to 'mel'."
255
255
  )
256
- elif output_type == "numpy" and self.melgan is None:
256
+ elif output_type == "np" and self.melgan is None:
257
257
  raise ValueError(
258
258
  "Cannot return output in 'np' format if melgan component is not defined. Make sure to define `self.melgan` or set 'output_type' to 'mel'."
259
259
  )
260
260
 
261
- if output_type == "numpy":
261
+ if output_type == "np":
262
262
  output = self.melgan(input_features=full_pred_mel.astype(np.float32))
263
263
  else:
264
264
  output = full_pred_mel
@@ -255,8 +255,8 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
255
255
  num_images_per_prompt,
256
256
  do_classifier_free_guidance,
257
257
  negative_prompt=None,
258
- prompt_embeds: Optional[torch.FloatTensor] = None,
259
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
258
+ prompt_embeds: Optional[torch.Tensor] = None,
259
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
260
260
  lora_scale: Optional[float] = None,
261
261
  **kwargs,
262
262
  ):
@@ -288,8 +288,8 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
288
288
  num_images_per_prompt,
289
289
  do_classifier_free_guidance,
290
290
  negative_prompt=None,
291
- prompt_embeds: Optional[torch.FloatTensor] = None,
292
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
291
+ prompt_embeds: Optional[torch.Tensor] = None,
292
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
293
293
  lora_scale: Optional[float] = None,
294
294
  clip_skip: Optional[int] = None,
295
295
  ):
@@ -309,10 +309,10 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
309
309
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
310
310
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
311
311
  less than `1`).
312
- prompt_embeds (`torch.FloatTensor`, *optional*):
312
+ prompt_embeds (`torch.Tensor`, *optional*):
313
313
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
314
314
  provided, text embeddings will be generated from `prompt` input argument.
315
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
315
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
316
316
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
317
317
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
318
318
  argument.
@@ -638,10 +638,10 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
638
638
  num_images_per_prompt: Optional[int] = 1,
639
639
  eta: Optional[float] = 0.1,
640
640
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
641
- prompt_embeds: Optional[torch.FloatTensor] = None,
641
+ prompt_embeds: Optional[torch.Tensor] = None,
642
642
  output_type: Optional[str] = "pil",
643
643
  return_dict: bool = True,
644
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
644
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
645
645
  callback_steps: int = 1,
646
646
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
647
647
  clip_skip: Optional[int] = None,
@@ -652,7 +652,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
652
652
  Args:
653
653
  prompt (`str` or `List[str]`):
654
654
  The prompt or prompts to guide the image generation.
655
- image (`torch.FloatTensor` `np.ndarray`, `PIL.Image.Image`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
655
+ image (`torch.Tensor` `np.ndarray`, `PIL.Image.Image`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
656
656
  `Image` or tensor representing an image batch to be used as the starting point. Can also accept image
657
657
  latents as `image`, but if passing latents directly it is not encoded again.
658
658
  strength (`float`, *optional*, defaults to 0.8):
@@ -678,10 +678,10 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
678
678
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
679
679
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
680
680
  generation deterministic.
681
- prompt_embeds (`torch.FloatTensor`, *optional*):
681
+ prompt_embeds (`torch.Tensor`, *optional*):
682
682
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
683
683
  provided, text embeddings are generated from the `prompt` input argument.
684
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
684
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
685
685
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
686
686
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
687
687
  output_type (`str`, *optional*, defaults to `"pil"`):
@@ -691,7 +691,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
691
691
  plain tuple.
692
692
  callback (`Callable`, *optional*):
693
693
  A function that calls every `callback_steps` steps during inference. The function is called with the
694
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
694
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
695
695
  callback_steps (`int`, *optional*, defaults to 1):
696
696
  The frequency at which the `callback` function is called. If not specified, the callback is called at
697
697
  every step.
@@ -48,7 +48,7 @@ def preprocess_image(image, batch_size):
48
48
 
49
49
 
50
50
  def preprocess_mask(mask, batch_size, scale_factor=8):
51
- if not isinstance(mask, torch.FloatTensor):
51
+ if not isinstance(mask, torch.Tensor):
52
52
  mask = mask.convert("L")
53
53
  w, h = mask.size
54
54
  w, h = (x - x % 8 for x in (w, h)) # resize to integer multiple of 8
@@ -225,8 +225,8 @@ class StableDiffusionInpaintPipelineLegacy(
225
225
  num_images_per_prompt,
226
226
  do_classifier_free_guidance,
227
227
  negative_prompt=None,
228
- prompt_embeds: Optional[torch.FloatTensor] = None,
229
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
228
+ prompt_embeds: Optional[torch.Tensor] = None,
229
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
230
230
  lora_scale: Optional[float] = None,
231
231
  **kwargs,
232
232
  ):
@@ -258,8 +258,8 @@ class StableDiffusionInpaintPipelineLegacy(
258
258
  num_images_per_prompt,
259
259
  do_classifier_free_guidance,
260
260
  negative_prompt=None,
261
- prompt_embeds: Optional[torch.FloatTensor] = None,
262
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
261
+ prompt_embeds: Optional[torch.Tensor] = None,
262
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
263
263
  lora_scale: Optional[float] = None,
264
264
  clip_skip: Optional[int] = None,
265
265
  ):
@@ -279,10 +279,10 @@ class StableDiffusionInpaintPipelineLegacy(
279
279
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
280
280
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
281
281
  less than `1`).
282
- prompt_embeds (`torch.FloatTensor`, *optional*):
282
+ prompt_embeds (`torch.Tensor`, *optional*):
283
283
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
284
284
  provided, text embeddings will be generated from `prompt` input argument.
285
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
285
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
286
286
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
287
287
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
288
288
  argument.
@@ -557,8 +557,8 @@ class StableDiffusionInpaintPipelineLegacy(
557
557
  def __call__(
558
558
  self,
559
559
  prompt: Union[str, List[str]] = None,
560
- image: Union[torch.FloatTensor, PIL.Image.Image] = None,
561
- mask_image: Union[torch.FloatTensor, PIL.Image.Image] = None,
560
+ image: Union[torch.Tensor, PIL.Image.Image] = None,
561
+ mask_image: Union[torch.Tensor, PIL.Image.Image] = None,
562
562
  strength: float = 0.8,
563
563
  num_inference_steps: Optional[int] = 50,
564
564
  guidance_scale: Optional[float] = 7.5,
@@ -567,11 +567,11 @@ class StableDiffusionInpaintPipelineLegacy(
567
567
  add_predicted_noise: Optional[bool] = False,
568
568
  eta: Optional[float] = 0.0,
569
569
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
570
- prompt_embeds: Optional[torch.FloatTensor] = None,
571
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
570
+ prompt_embeds: Optional[torch.Tensor] = None,
571
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
572
572
  output_type: Optional[str] = "pil",
573
573
  return_dict: bool = True,
574
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
574
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
575
575
  callback_steps: int = 1,
576
576
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
577
577
  clip_skip: Optional[int] = None,
@@ -583,10 +583,10 @@ class StableDiffusionInpaintPipelineLegacy(
583
583
  prompt (`str` or `List[str]`, *optional*):
584
584
  The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
585
585
  instead.
586
- image (`torch.FloatTensor` or `PIL.Image.Image`):
586
+ image (`torch.Tensor` or `PIL.Image.Image`):
587
587
  `Image`, or tensor representing an image batch, that will be used as the starting point for the
588
588
  process. This is the image whose masked region will be inpainted.
589
- mask_image (`torch.FloatTensor` or `PIL.Image.Image`):
589
+ mask_image (`torch.Tensor` or `PIL.Image.Image`):
590
590
  `Image`, or tensor representing an image batch, to mask `image`. White pixels in the mask will be
591
591
  replaced by noise and therefore repainted, while black pixels will be preserved. If `mask_image` is a
592
592
  PIL image, it will be converted to a single channel (luminance) before use. If mask is a tensor, the
@@ -620,10 +620,10 @@ class StableDiffusionInpaintPipelineLegacy(
620
620
  generator (`torch.Generator`, *optional*):
621
621
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
622
622
  to make generation deterministic.
623
- prompt_embeds (`torch.FloatTensor`, *optional*):
623
+ prompt_embeds (`torch.Tensor`, *optional*):
624
624
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
625
625
  provided, text embeddings will be generated from `prompt` input argument.
626
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
626
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
627
627
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
628
628
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
629
629
  argument.
@@ -635,7 +635,7 @@ class StableDiffusionInpaintPipelineLegacy(
635
635
  plain tuple.
636
636
  callback (`Callable`, *optional*):
637
637
  A function that will be called every `callback_steps` steps during inference. The function will be
638
- called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
638
+ called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
639
639
  callback_steps (`int`, *optional*, defaults to 1):
640
640
  The frequency at which the `callback` function will be called. If not specified, the callback will be
641
641
  called at every step.
@@ -693,7 +693,7 @@ class StableDiffusionInpaintPipelineLegacy(
693
693
  prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
694
694
 
695
695
  # 4. Preprocess image and mask
696
- if not isinstance(image, torch.FloatTensor):
696
+ if not isinstance(image, torch.Tensor):
697
697
  image = preprocess_image(image, batch_size)
698
698
 
699
699
  mask_image = preprocess_mask(mask_image, batch_size, self.vae_scale_factor)
@@ -163,8 +163,8 @@ class StableDiffusionModelEditingPipeline(
163
163
  num_images_per_prompt,
164
164
  do_classifier_free_guidance,
165
165
  negative_prompt=None,
166
- prompt_embeds: Optional[torch.FloatTensor] = None,
167
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
166
+ prompt_embeds: Optional[torch.Tensor] = None,
167
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
168
168
  lora_scale: Optional[float] = None,
169
169
  **kwargs,
170
170
  ):
@@ -196,8 +196,8 @@ class StableDiffusionModelEditingPipeline(
196
196
  num_images_per_prompt,
197
197
  do_classifier_free_guidance,
198
198
  negative_prompt=None,
199
- prompt_embeds: Optional[torch.FloatTensor] = None,
200
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
199
+ prompt_embeds: Optional[torch.Tensor] = None,
200
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
201
201
  lora_scale: Optional[float] = None,
202
202
  clip_skip: Optional[int] = None,
203
203
  ):
@@ -217,10 +217,10 @@ class StableDiffusionModelEditingPipeline(
217
217
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
218
218
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
219
219
  less than `1`).
220
- prompt_embeds (`torch.FloatTensor`, *optional*):
220
+ prompt_embeds (`torch.Tensor`, *optional*):
221
221
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
222
222
  provided, text embeddings will be generated from `prompt` input argument.
223
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
223
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
224
224
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
225
225
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
226
226
  argument.
@@ -469,7 +469,12 @@ class StableDiffusionModelEditingPipeline(
469
469
 
470
470
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
471
471
  def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
472
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
472
+ shape = (
473
+ batch_size,
474
+ num_channels_latents,
475
+ int(height) // self.vae_scale_factor,
476
+ int(width) // self.vae_scale_factor,
477
+ )
473
478
  if isinstance(generator, list) and len(generator) != batch_size:
474
479
  raise ValueError(
475
480
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -615,12 +620,12 @@ class StableDiffusionModelEditingPipeline(
615
620
  num_images_per_prompt: Optional[int] = 1,
616
621
  eta: float = 0.0,
617
622
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
618
- latents: Optional[torch.FloatTensor] = None,
619
- prompt_embeds: Optional[torch.FloatTensor] = None,
620
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
623
+ latents: Optional[torch.Tensor] = None,
624
+ prompt_embeds: Optional[torch.Tensor] = None,
625
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
621
626
  output_type: Optional[str] = "pil",
622
627
  return_dict: bool = True,
623
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
628
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
624
629
  callback_steps: int = 1,
625
630
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
626
631
  clip_skip: Optional[int] = None,
@@ -652,14 +657,14 @@ class StableDiffusionModelEditingPipeline(
652
657
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
653
658
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
654
659
  generation deterministic.
655
- latents (`torch.FloatTensor`, *optional*):
660
+ latents (`torch.Tensor`, *optional*):
656
661
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
657
662
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
658
663
  tensor is generated by sampling using the supplied random `generator`.
659
- prompt_embeds (`torch.FloatTensor`, *optional*):
664
+ prompt_embeds (`torch.Tensor`, *optional*):
660
665
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
661
666
  provided, text embeddings are generated from the `prompt` input argument.
662
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
667
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
663
668
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
664
669
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
665
670
  output_type (`str`, *optional*, defaults to `"pil"`):
@@ -669,7 +674,7 @@ class StableDiffusionModelEditingPipeline(
669
674
  plain tuple.
670
675
  callback (`Callable`, *optional*):
671
676
  A function that calls every `callback_steps` steps during inference. The function is called with the
672
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
677
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
673
678
  callback_steps (`int`, *optional*, defaults to 1):
674
679
  The frequency at which the `callback` function is called. If not specified, the callback is called at
675
680
  every step.
@@ -154,8 +154,8 @@ class StableDiffusionParadigmsPipeline(
154
154
  num_images_per_prompt,
155
155
  do_classifier_free_guidance,
156
156
  negative_prompt=None,
157
- prompt_embeds: Optional[torch.FloatTensor] = None,
158
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
157
+ prompt_embeds: Optional[torch.Tensor] = None,
158
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
159
159
  lora_scale: Optional[float] = None,
160
160
  **kwargs,
161
161
  ):
@@ -187,8 +187,8 @@ class StableDiffusionParadigmsPipeline(
187
187
  num_images_per_prompt,
188
188
  do_classifier_free_guidance,
189
189
  negative_prompt=None,
190
- prompt_embeds: Optional[torch.FloatTensor] = None,
191
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
190
+ prompt_embeds: Optional[torch.Tensor] = None,
191
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
192
192
  lora_scale: Optional[float] = None,
193
193
  clip_skip: Optional[int] = None,
194
194
  ):
@@ -208,10 +208,10 @@ class StableDiffusionParadigmsPipeline(
208
208
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
209
209
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
210
210
  less than `1`).
211
- prompt_embeds (`torch.FloatTensor`, *optional*):
211
+ prompt_embeds (`torch.Tensor`, *optional*):
212
212
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
213
213
  provided, text embeddings will be generated from `prompt` input argument.
214
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
214
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
215
215
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
216
216
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
217
217
  argument.
@@ -448,7 +448,12 @@ class StableDiffusionParadigmsPipeline(
448
448
 
449
449
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
450
450
  def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
451
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
451
+ shape = (
452
+ batch_size,
453
+ num_channels_latents,
454
+ int(height) // self.vae_scale_factor,
455
+ int(width) // self.vae_scale_factor,
456
+ )
452
457
  if isinstance(generator, list) and len(generator) != batch_size:
453
458
  raise ValueError(
454
459
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -487,12 +492,12 @@ class StableDiffusionParadigmsPipeline(
487
492
  num_images_per_prompt: Optional[int] = 1,
488
493
  eta: float = 0.0,
489
494
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
490
- latents: Optional[torch.FloatTensor] = None,
491
- prompt_embeds: Optional[torch.FloatTensor] = None,
492
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
495
+ latents: Optional[torch.Tensor] = None,
496
+ prompt_embeds: Optional[torch.Tensor] = None,
497
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
493
498
  output_type: Optional[str] = "pil",
494
499
  return_dict: bool = True,
495
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
500
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
496
501
  callback_steps: int = 1,
497
502
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
498
503
  debug: bool = False,
@@ -532,14 +537,14 @@ class StableDiffusionParadigmsPipeline(
532
537
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
533
538
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
534
539
  generation deterministic.
535
- latents (`torch.FloatTensor`, *optional*):
540
+ latents (`torch.Tensor`, *optional*):
536
541
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
537
542
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
538
543
  tensor is generated by sampling using the supplied random `generator`.
539
- prompt_embeds (`torch.FloatTensor`, *optional*):
544
+ prompt_embeds (`torch.Tensor`, *optional*):
540
545
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
541
546
  provided, text embeddings are generated from the `prompt` input argument.
542
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
547
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
543
548
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
544
549
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
545
550
  output_type (`str`, *optional*, defaults to `"pil"`):
@@ -549,7 +554,7 @@ class StableDiffusionParadigmsPipeline(
549
554
  plain tuple.
550
555
  callback (`Callable`, *optional*):
551
556
  A function that calls every `callback_steps` steps during inference. The function is called with the
552
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
557
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
553
558
  callback_steps (`int`, *optional*, defaults to 1):
554
559
  The frequency at which the `callback` function is called. If not specified, the callback is called at
555
560
  every step.