diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. diffusers/__init__.py +18 -1
  2. diffusers/callbacks.py +156 -0
  3. diffusers/commands/env.py +110 -6
  4. diffusers/configuration_utils.py +16 -11
  5. diffusers/dependency_versions_table.py +2 -1
  6. diffusers/image_processor.py +158 -45
  7. diffusers/loaders/__init__.py +2 -5
  8. diffusers/loaders/autoencoder.py +4 -4
  9. diffusers/loaders/controlnet.py +4 -4
  10. diffusers/loaders/ip_adapter.py +80 -22
  11. diffusers/loaders/lora.py +134 -20
  12. diffusers/loaders/lora_conversion_utils.py +46 -43
  13. diffusers/loaders/peft.py +4 -3
  14. diffusers/loaders/single_file.py +401 -170
  15. diffusers/loaders/single_file_model.py +290 -0
  16. diffusers/loaders/single_file_utils.py +616 -672
  17. diffusers/loaders/textual_inversion.py +41 -20
  18. diffusers/loaders/unet.py +168 -115
  19. diffusers/loaders/unet_loader_utils.py +163 -0
  20. diffusers/models/__init__.py +2 -0
  21. diffusers/models/activations.py +11 -3
  22. diffusers/models/attention.py +10 -11
  23. diffusers/models/attention_processor.py +367 -148
  24. diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
  25. diffusers/models/autoencoders/autoencoder_kl.py +18 -19
  26. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
  27. diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
  28. diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
  29. diffusers/models/autoencoders/vae.py +23 -24
  30. diffusers/models/controlnet.py +12 -9
  31. diffusers/models/controlnet_flax.py +4 -4
  32. diffusers/models/controlnet_xs.py +1915 -0
  33. diffusers/models/downsampling.py +17 -18
  34. diffusers/models/embeddings.py +147 -24
  35. diffusers/models/model_loading_utils.py +149 -0
  36. diffusers/models/modeling_flax_pytorch_utils.py +2 -1
  37. diffusers/models/modeling_flax_utils.py +4 -4
  38. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  39. diffusers/models/modeling_utils.py +118 -98
  40. diffusers/models/resnet.py +18 -23
  41. diffusers/models/transformer_temporal.py +3 -3
  42. diffusers/models/transformers/dual_transformer_2d.py +4 -4
  43. diffusers/models/transformers/prior_transformer.py +7 -7
  44. diffusers/models/transformers/t5_film_transformer.py +17 -19
  45. diffusers/models/transformers/transformer_2d.py +272 -156
  46. diffusers/models/transformers/transformer_temporal.py +10 -10
  47. diffusers/models/unets/unet_1d.py +5 -5
  48. diffusers/models/unets/unet_1d_blocks.py +29 -29
  49. diffusers/models/unets/unet_2d.py +6 -6
  50. diffusers/models/unets/unet_2d_blocks.py +137 -128
  51. diffusers/models/unets/unet_2d_condition.py +20 -15
  52. diffusers/models/unets/unet_2d_condition_flax.py +6 -5
  53. diffusers/models/unets/unet_3d_blocks.py +79 -77
  54. diffusers/models/unets/unet_3d_condition.py +13 -9
  55. diffusers/models/unets/unet_i2vgen_xl.py +14 -13
  56. diffusers/models/unets/unet_kandinsky3.py +1 -1
  57. diffusers/models/unets/unet_motion_model.py +114 -14
  58. diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
  59. diffusers/models/unets/unet_stable_cascade.py +16 -13
  60. diffusers/models/upsampling.py +17 -20
  61. diffusers/models/vq_model.py +16 -15
  62. diffusers/pipelines/__init__.py +25 -3
  63. diffusers/pipelines/amused/pipeline_amused.py +12 -12
  64. diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
  65. diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
  66. diffusers/pipelines/animatediff/__init__.py +2 -0
  67. diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
  68. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
  69. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
  70. diffusers/pipelines/animatediff/pipeline_output.py +3 -2
  71. diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
  72. diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
  73. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
  74. diffusers/pipelines/auto_pipeline.py +21 -17
  75. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  76. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
  77. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
  78. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
  79. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
  80. diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
  81. diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
  82. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
  83. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
  84. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
  85. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
  86. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
  87. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
  88. diffusers/pipelines/controlnet_xs/__init__.py +68 -0
  89. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
  90. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
  91. diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
  92. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
  93. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
  94. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
  95. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
  96. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
  97. diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
  98. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
  99. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
  100. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
  101. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
  102. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
  103. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -21
  104. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
  105. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
  106. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
  107. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
  108. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
  109. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
  110. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
  111. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
  112. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
  113. diffusers/pipelines/dit/pipeline_dit.py +3 -0
  114. diffusers/pipelines/free_init_utils.py +39 -38
  115. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
  116. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
  117. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
  118. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
  119. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
  120. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
  121. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  122. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
  123. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
  124. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
  125. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
  126. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
  127. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
  128. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
  129. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
  130. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
  131. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
  132. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
  133. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
  134. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
  135. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
  136. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
  137. diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
  138. diffusers/pipelines/marigold/__init__.py +50 -0
  139. diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
  140. diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
  141. diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
  142. diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
  143. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
  144. diffusers/pipelines/pia/pipeline_pia.py +39 -125
  145. diffusers/pipelines/pipeline_flax_utils.py +4 -4
  146. diffusers/pipelines/pipeline_loading_utils.py +268 -23
  147. diffusers/pipelines/pipeline_utils.py +266 -37
  148. diffusers/pipelines/pixart_alpha/__init__.py +8 -1
  149. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
  150. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
  151. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
  152. diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
  153. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
  154. diffusers/pipelines/shap_e/renderer.py +1 -1
  155. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +36 -22
  156. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
  157. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
  158. diffusers/pipelines/stable_diffusion/__init__.py +0 -1
  159. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
  160. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  161. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
  162. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
  163. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
  164. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
  165. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
  166. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
  167. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
  168. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
  169. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
  170. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
  171. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
  172. diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
  173. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
  174. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -42
  175. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
  176. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
  177. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
  178. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
  179. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
  180. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
  181. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
  182. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  183. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
  184. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
  185. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
  186. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
  187. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
  188. diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
  189. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
  190. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
  191. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
  192. diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
  193. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
  194. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
  195. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
  196. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
  197. diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
  198. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
  199. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
  200. diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
  201. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
  202. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
  203. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
  204. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
  205. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
  206. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
  207. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
  208. diffusers/schedulers/__init__.py +2 -2
  209. diffusers/schedulers/deprecated/__init__.py +1 -1
  210. diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
  211. diffusers/schedulers/scheduling_amused.py +5 -5
  212. diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
  213. diffusers/schedulers/scheduling_consistency_models.py +23 -25
  214. diffusers/schedulers/scheduling_ddim.py +22 -24
  215. diffusers/schedulers/scheduling_ddim_flax.py +2 -1
  216. diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
  217. diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
  218. diffusers/schedulers/scheduling_ddpm.py +20 -22
  219. diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
  220. diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
  221. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
  222. diffusers/schedulers/scheduling_deis_multistep.py +46 -42
  223. diffusers/schedulers/scheduling_dpmsolver_multistep.py +107 -77
  224. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
  225. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
  226. diffusers/schedulers/scheduling_dpmsolver_sde.py +26 -22
  227. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +90 -65
  228. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +78 -53
  229. diffusers/schedulers/scheduling_edm_euler.py +53 -30
  230. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +26 -28
  231. diffusers/schedulers/scheduling_euler_discrete.py +163 -67
  232. diffusers/schedulers/scheduling_heun_discrete.py +60 -38
  233. diffusers/schedulers/scheduling_ipndm.py +8 -8
  234. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +22 -18
  235. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +22 -18
  236. diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
  237. diffusers/schedulers/scheduling_lcm.py +21 -23
  238. diffusers/schedulers/scheduling_lms_discrete.py +27 -25
  239. diffusers/schedulers/scheduling_pndm.py +20 -20
  240. diffusers/schedulers/scheduling_repaint.py +20 -20
  241. diffusers/schedulers/scheduling_sasolver.py +55 -54
  242. diffusers/schedulers/scheduling_sde_ve.py +19 -19
  243. diffusers/schedulers/scheduling_tcd.py +39 -30
  244. diffusers/schedulers/scheduling_unclip.py +15 -15
  245. diffusers/schedulers/scheduling_unipc_multistep.py +115 -41
  246. diffusers/schedulers/scheduling_utils.py +14 -5
  247. diffusers/schedulers/scheduling_utils_flax.py +3 -3
  248. diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
  249. diffusers/training_utils.py +56 -1
  250. diffusers/utils/__init__.py +7 -0
  251. diffusers/utils/doc_utils.py +1 -0
  252. diffusers/utils/dummy_pt_objects.py +30 -0
  253. diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
  254. diffusers/utils/dynamic_modules_utils.py +24 -11
  255. diffusers/utils/hub_utils.py +3 -2
  256. diffusers/utils/import_utils.py +91 -0
  257. diffusers/utils/loading_utils.py +2 -2
  258. diffusers/utils/logging.py +1 -1
  259. diffusers/utils/peft_utils.py +32 -5
  260. diffusers/utils/state_dict_utils.py +11 -2
  261. diffusers/utils/testing_utils.py +71 -6
  262. diffusers/utils/torch_utils.py +1 -0
  263. diffusers/video_processor.py +113 -0
  264. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/METADATA +7 -7
  265. diffusers-0.28.0.dist-info/RECORD +414 -0
  266. diffusers-0.27.1.dist-info/RECORD +0 -399
  267. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
  268. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/WHEEL +0 -0
  269. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
  270. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -197,7 +197,7 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
197
197
  )
198
198
 
199
199
  # verify batch size of prompt and image are same if image is a list or tensor or numpy array
200
- if isinstance(image, list) or isinstance(image, np.ndarray):
200
+ if isinstance(image, (list, np.ndarray)):
201
201
  if prompt is not None and isinstance(prompt, str):
202
202
  batch_size = 1
203
203
  elif prompt is not None and isinstance(prompt, list):
@@ -395,7 +395,7 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
395
395
  [`schedulers.DDIMScheduler`], will be ignored for others.
396
396
  generator (`np.random.RandomState`, *optional*):
397
397
  A np.random.RandomState to make generation deterministic.
398
- latents (`torch.FloatTensor`, *optional*):
398
+ latents (`torch.Tensor`, *optional*):
399
399
  Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
400
400
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
401
401
  tensor will ge generated by sampling using the supplied random `generator`.
@@ -469,7 +469,7 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
469
469
 
470
470
  latents = self.prepare_latents(
471
471
  batch_size * num_images_per_prompt,
472
- self.num_latent_channels,
472
+ self.config.num_latent_channels,
473
473
  height,
474
474
  width,
475
475
  latents_dtype,
@@ -498,12 +498,12 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
498
498
 
499
499
  # 7. Check that sizes of image and latents match
500
500
  num_channels_image = image.shape[1]
501
- if self.num_latent_channels + num_channels_image != self.num_unet_input_channels:
501
+ if self.config.num_latent_channels + num_channels_image != self.config.num_unet_input_channels:
502
502
  raise ValueError(
503
503
  "Incorrect configuration settings! The config of `pipeline.unet` expects"
504
- f" {self.num_unet_input_channels} but received `num_channels_latents`: {self.num_latent_channels} +"
504
+ f" {self.config.num_unet_input_channels} but received `num_channels_latents`: {self.config.num_latent_channels} +"
505
505
  f" `num_channels_image`: {num_channels_image} "
506
- f" = {self.num_latent_channels + num_channels_image}. Please verify the config of"
506
+ f" = {self.config.num_latent_channels + num_channels_image}. Please verify the config of"
507
507
  " `pipeline.unet` or your `image` input."
508
508
  )
509
509
 
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
15
14
  import inspect
16
15
  from typing import Any, Callable, Dict, List, Optional, Union
17
16
 
@@ -19,6 +18,7 @@ import torch
19
18
  from packaging import version
20
19
  from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
21
20
 
21
+ from ...callbacks import MultiPipelineCallbacks, PipelineCallback
22
22
  from ...configuration_utils import FrozenDict
23
23
  from ...image_processor import PipelineImageInput, VaeImageProcessor
24
24
  from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
@@ -75,6 +75,7 @@ def retrieve_timesteps(
75
75
  num_inference_steps: Optional[int] = None,
76
76
  device: Optional[Union[str, torch.device]] = None,
77
77
  timesteps: Optional[List[int]] = None,
78
+ sigmas: Optional[List[float]] = None,
78
79
  **kwargs,
79
80
  ):
80
81
  """
@@ -85,19 +86,23 @@ def retrieve_timesteps(
85
86
  scheduler (`SchedulerMixin`):
86
87
  The scheduler to get timesteps from.
87
88
  num_inference_steps (`int`):
88
- The number of diffusion steps used when generating samples with a pre-trained model. If used,
89
- `timesteps` must be `None`.
89
+ The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
90
+ must be `None`.
90
91
  device (`str` or `torch.device`, *optional*):
91
92
  The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
92
93
  timesteps (`List[int]`, *optional*):
93
- Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
94
- timestep spacing strategy of the scheduler is used. If `timesteps` is passed, `num_inference_steps`
95
- must be `None`.
94
+ Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
95
+ `num_inference_steps` and `sigmas` must be `None`.
96
+ sigmas (`List[float]`, *optional*):
97
+ Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
98
+ `num_inference_steps` and `timesteps` must be `None`.
96
99
 
97
100
  Returns:
98
101
  `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
99
102
  second element is the number of inference steps.
100
103
  """
104
+ if timesteps is not None and sigmas is not None:
105
+ raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
101
106
  if timesteps is not None:
102
107
  accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
103
108
  if not accepts_timesteps:
@@ -108,6 +113,16 @@ def retrieve_timesteps(
108
113
  scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
109
114
  timesteps = scheduler.timesteps
110
115
  num_inference_steps = len(timesteps)
116
+ elif sigmas is not None:
117
+ accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
118
+ if not accept_sigmas:
119
+ raise ValueError(
120
+ f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
121
+ f" sigmas schedules. Please check whether you are using the correct scheduler."
122
+ )
123
+ scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
124
+ timesteps = scheduler.timesteps
125
+ num_inference_steps = len(timesteps)
111
126
  else:
112
127
  scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
113
128
  timesteps = scheduler.timesteps
@@ -259,8 +274,8 @@ class StableDiffusionPipeline(
259
274
  num_images_per_prompt,
260
275
  do_classifier_free_guidance,
261
276
  negative_prompt=None,
262
- prompt_embeds: Optional[torch.FloatTensor] = None,
263
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
277
+ prompt_embeds: Optional[torch.Tensor] = None,
278
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
264
279
  lora_scale: Optional[float] = None,
265
280
  **kwargs,
266
281
  ):
@@ -291,8 +306,8 @@ class StableDiffusionPipeline(
291
306
  num_images_per_prompt,
292
307
  do_classifier_free_guidance,
293
308
  negative_prompt=None,
294
- prompt_embeds: Optional[torch.FloatTensor] = None,
295
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
309
+ prompt_embeds: Optional[torch.Tensor] = None,
310
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
296
311
  lora_scale: Optional[float] = None,
297
312
  clip_skip: Optional[int] = None,
298
313
  ):
@@ -312,10 +327,10 @@ class StableDiffusionPipeline(
312
327
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
313
328
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
314
329
  less than `1`).
315
- prompt_embeds (`torch.FloatTensor`, *optional*):
330
+ prompt_embeds (`torch.Tensor`, *optional*):
316
331
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
317
332
  provided, text embeddings will be generated from `prompt` input argument.
318
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
333
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
319
334
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
320
335
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
321
336
  argument.
@@ -652,7 +667,12 @@ class StableDiffusionPipeline(
652
667
  )
653
668
 
654
669
  def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
655
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
670
+ shape = (
671
+ batch_size,
672
+ num_channels_latents,
673
+ int(height) // self.vae_scale_factor,
674
+ int(width) // self.vae_scale_factor,
675
+ )
656
676
  if isinstance(generator, list) and len(generator) != batch_size:
657
677
  raise ValueError(
658
678
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -669,20 +689,22 @@ class StableDiffusionPipeline(
669
689
  return latents
670
690
 
671
691
  # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
672
- def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
692
+ def get_guidance_scale_embedding(
693
+ self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
694
+ ) -> torch.Tensor:
673
695
  """
674
696
  See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
675
697
 
676
698
  Args:
677
- timesteps (`torch.Tensor`):
678
- generate embedding vectors at these timesteps
699
+ w (`torch.Tensor`):
700
+ Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
679
701
  embedding_dim (`int`, *optional*, defaults to 512):
680
- dimension of the embeddings to generate
681
- dtype:
682
- data type of the generated embeddings
702
+ Dimension of the embeddings to generate.
703
+ dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
704
+ Data type of the generated embeddings.
683
705
 
684
706
  Returns:
685
- `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
707
+ `torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
686
708
  """
687
709
  assert len(w.shape) == 1
688
710
  w = w * 1000.0
@@ -737,22 +759,25 @@ class StableDiffusionPipeline(
737
759
  width: Optional[int] = None,
738
760
  num_inference_steps: int = 50,
739
761
  timesteps: List[int] = None,
762
+ sigmas: List[float] = None,
740
763
  guidance_scale: float = 7.5,
741
764
  negative_prompt: Optional[Union[str, List[str]]] = None,
742
765
  num_images_per_prompt: Optional[int] = 1,
743
766
  eta: float = 0.0,
744
767
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
745
- latents: Optional[torch.FloatTensor] = None,
746
- prompt_embeds: Optional[torch.FloatTensor] = None,
747
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
768
+ latents: Optional[torch.Tensor] = None,
769
+ prompt_embeds: Optional[torch.Tensor] = None,
770
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
748
771
  ip_adapter_image: Optional[PipelineImageInput] = None,
749
- ip_adapter_image_embeds: Optional[List[torch.FloatTensor]] = None,
772
+ ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
750
773
  output_type: Optional[str] = "pil",
751
774
  return_dict: bool = True,
752
775
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
753
776
  guidance_rescale: float = 0.0,
754
777
  clip_skip: Optional[int] = None,
755
- callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
778
+ callback_on_step_end: Optional[
779
+ Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
780
+ ] = None,
756
781
  callback_on_step_end_tensor_inputs: List[str] = ["latents"],
757
782
  **kwargs,
758
783
  ):
@@ -773,6 +798,10 @@ class StableDiffusionPipeline(
773
798
  Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
774
799
  in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
775
800
  passed will be used. Must be in descending order.
801
+ sigmas (`List[float]`, *optional*):
802
+ Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
803
+ their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
804
+ will be used.
776
805
  guidance_scale (`float`, *optional*, defaults to 7.5):
777
806
  A higher guidance scale value encourages the model to generate images closely linked to the text
778
807
  `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
@@ -787,22 +816,22 @@ class StableDiffusionPipeline(
787
816
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
788
817
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
789
818
  generation deterministic.
790
- latents (`torch.FloatTensor`, *optional*):
819
+ latents (`torch.Tensor`, *optional*):
791
820
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
792
821
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
793
822
  tensor is generated by sampling using the supplied random `generator`.
794
- prompt_embeds (`torch.FloatTensor`, *optional*):
823
+ prompt_embeds (`torch.Tensor`, *optional*):
795
824
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
796
825
  provided, text embeddings are generated from the `prompt` input argument.
797
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
826
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
798
827
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
799
828
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
800
829
  ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
801
- ip_adapter_image_embeds (`List[torch.FloatTensor]`, *optional*):
802
- Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of IP-adapters.
803
- Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should contain the negative image embedding
804
- if `do_classifier_free_guidance` is set to `True`.
805
- If not provided, embeddings are computed from the `ip_adapter_image` input argument.
830
+ ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
831
+ Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
832
+ IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
833
+ contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
834
+ provided, embeddings are computed from the `ip_adapter_image` input argument.
806
835
  output_type (`str`, *optional*, defaults to `"pil"`):
807
836
  The output format of the generated image. Choose between `PIL.Image` or `np.array`.
808
837
  return_dict (`bool`, *optional*, defaults to `True`):
@@ -818,11 +847,11 @@ class StableDiffusionPipeline(
818
847
  clip_skip (`int`, *optional*):
819
848
  Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
820
849
  the output of the pre-final layer will be used for computing the prompt embeddings.
821
- callback_on_step_end (`Callable`, *optional*):
822
- A function that calls at the end of each denoising steps during the inference. The function is called
823
- with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
824
- callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
825
- `callback_on_step_end_tensor_inputs`.
850
+ callback_on_step_end (`Callable`, `PipelineCallback`, `MultiPipelineCallbacks`, *optional*):
851
+ A function or a subclass of `PipelineCallback` or `MultiPipelineCallbacks` that is called at the end of
852
+ each denoising step during the inference. with the following arguments: `callback_on_step_end(self:
853
+ DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)`. `callback_kwargs` will include a
854
+ list of all tensors as specified by `callback_on_step_end_tensor_inputs`.
826
855
  callback_on_step_end_tensor_inputs (`List`, *optional*):
827
856
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
828
857
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
@@ -854,6 +883,9 @@ class StableDiffusionPipeline(
854
883
  "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
855
884
  )
856
885
 
886
+ if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
887
+ callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
888
+
857
889
  # 0. Default height and width to unet
858
890
  height = height or self.unet.config.sample_size * self.vae_scale_factor
859
891
  width = width or self.unet.config.sample_size * self.vae_scale_factor
@@ -922,7 +954,9 @@ class StableDiffusionPipeline(
922
954
  )
923
955
 
924
956
  # 4. Prepare timesteps
925
- timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
957
+ timesteps, num_inference_steps = retrieve_timesteps(
958
+ self.scheduler, num_inference_steps, device, timesteps, sigmas
959
+ )
926
960
 
927
961
  # 5. Prepare latent variables
928
962
  num_channels_latents = self.unet.config.in_channels
@@ -156,8 +156,8 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
156
156
  num_images_per_prompt,
157
157
  do_classifier_free_guidance,
158
158
  negative_prompt=None,
159
- prompt_embeds: Optional[torch.FloatTensor] = None,
160
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
159
+ prompt_embeds: Optional[torch.Tensor] = None,
160
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
161
161
  lora_scale: Optional[float] = None,
162
162
  **kwargs,
163
163
  ):
@@ -189,8 +189,8 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
189
189
  num_images_per_prompt,
190
190
  do_classifier_free_guidance,
191
191
  negative_prompt=None,
192
- prompt_embeds: Optional[torch.FloatTensor] = None,
193
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
192
+ prompt_embeds: Optional[torch.Tensor] = None,
193
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
194
194
  lora_scale: Optional[float] = None,
195
195
  clip_skip: Optional[int] = None,
196
196
  ):
@@ -210,10 +210,10 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
210
210
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
211
211
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
212
212
  less than `1`).
213
- prompt_embeds (`torch.FloatTensor`, *optional*):
213
+ prompt_embeds (`torch.Tensor`, *optional*):
214
214
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
215
215
  provided, text embeddings will be generated from `prompt` input argument.
216
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
216
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
217
217
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
218
218
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
219
219
  argument.
@@ -548,8 +548,15 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
548
548
  pixel_values = pixel_values.to(device=device)
549
549
  # The DPT-Hybrid model uses batch-norm layers which are not compatible with fp16.
550
550
  # So we use `torch.autocast` here for half precision inference.
551
- context_manger = torch.autocast("cuda", dtype=dtype) if device.type == "cuda" else contextlib.nullcontext()
552
- with context_manger:
551
+ if torch.backends.mps.is_available():
552
+ autocast_ctx = contextlib.nullcontext()
553
+ logger.warning(
554
+ "The DPT-Hybrid model uses batch-norm layers which are not compatible with fp16, but autocast is not yet supported on MPS."
555
+ )
556
+ else:
557
+ autocast_ctx = torch.autocast(device.type, dtype=dtype)
558
+
559
+ with autocast_ctx:
553
560
  depth_map = self.depth_estimator(pixel_values).predicted_depth
554
561
  else:
555
562
  depth_map = depth_map.to(device=device, dtype=dtype)
@@ -602,7 +609,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
602
609
  self,
603
610
  prompt: Union[str, List[str]] = None,
604
611
  image: PipelineImageInput = None,
605
- depth_map: Optional[torch.FloatTensor] = None,
612
+ depth_map: Optional[torch.Tensor] = None,
606
613
  strength: float = 0.8,
607
614
  num_inference_steps: Optional[int] = 50,
608
615
  guidance_scale: Optional[float] = 7.5,
@@ -610,8 +617,8 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
610
617
  num_images_per_prompt: Optional[int] = 1,
611
618
  eta: Optional[float] = 0.0,
612
619
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
613
- prompt_embeds: Optional[torch.FloatTensor] = None,
614
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
620
+ prompt_embeds: Optional[torch.Tensor] = None,
621
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
615
622
  output_type: Optional[str] = "pil",
616
623
  return_dict: bool = True,
617
624
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
@@ -626,10 +633,10 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
626
633
  Args:
627
634
  prompt (`str` or `List[str]`, *optional*):
628
635
  The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
629
- image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
636
+ image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
630
637
  `Image` or tensor representing an image batch to be used as the starting point. Can accept image
631
638
  latents as `image` only if `depth_map` is not `None`.
632
- depth_map (`torch.FloatTensor`, *optional*):
639
+ depth_map (`torch.Tensor`, *optional*):
633
640
  Depth prediction to be used as additional conditioning for the image generation process. If not
634
641
  defined, it automatically predicts the depth with `self.depth_estimator`.
635
642
  strength (`float`, *optional*, defaults to 0.8):
@@ -655,10 +662,10 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
655
662
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
656
663
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
657
664
  generation deterministic.
658
- prompt_embeds (`torch.FloatTensor`, *optional*):
665
+ prompt_embeds (`torch.Tensor`, *optional*):
659
666
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
660
667
  provided, text embeddings are generated from the `prompt` input argument.
661
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
668
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
662
669
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
663
670
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
664
671
  output_type (`str`, *optional*, defaults to `"pil"`):
@@ -700,8 +707,8 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
700
707
  >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
701
708
  >>> init_image = Image.open(requests.get(url, stream=True).raw)
702
709
  >>> prompt = "two tigers"
703
- >>> n_propmt = "bad, deformed, ugly, bad anotomy"
704
- >>> image = pipe(prompt=prompt, image=init_image, negative_prompt=n_propmt, strength=0.7).images[0]
710
+ >>> n_prompt = "bad, deformed, ugly, bad anotomy"
711
+ >>> image = pipe(prompt=prompt, image=init_image, negative_prompt=n_prompt, strength=0.7).images[0]
705
712
  ```
706
713
 
707
714
  Returns:
@@ -207,7 +207,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
207
207
  and not isinstance(image, list)
208
208
  ):
209
209
  raise ValueError(
210
- "`image` has to be of type `torch.FloatTensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
210
+ "`image` has to be of type `torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
211
211
  f" {type(image)}"
212
212
  )
213
213
 
@@ -224,7 +224,12 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
224
224
 
225
225
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
226
226
  def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
227
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
227
+ shape = (
228
+ batch_size,
229
+ num_channels_latents,
230
+ int(height) // self.vae_scale_factor,
231
+ int(width) // self.vae_scale_factor,
232
+ )
228
233
  if isinstance(generator, list) and len(generator) != batch_size:
229
234
  raise ValueError(
230
235
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -243,7 +248,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
243
248
  @torch.no_grad()
244
249
  def __call__(
245
250
  self,
246
- image: Union[PIL.Image.Image, List[PIL.Image.Image], torch.FloatTensor],
251
+ image: Union[PIL.Image.Image, List[PIL.Image.Image], torch.Tensor],
247
252
  height: Optional[int] = None,
248
253
  width: Optional[int] = None,
249
254
  num_inference_steps: int = 50,
@@ -251,17 +256,17 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
251
256
  num_images_per_prompt: Optional[int] = 1,
252
257
  eta: float = 0.0,
253
258
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
254
- latents: Optional[torch.FloatTensor] = None,
259
+ latents: Optional[torch.Tensor] = None,
255
260
  output_type: Optional[str] = "pil",
256
261
  return_dict: bool = True,
257
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
262
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
258
263
  callback_steps: int = 1,
259
264
  ):
260
265
  r"""
261
266
  The call function to the pipeline for generation.
262
267
 
263
268
  Args:
264
- image (`PIL.Image.Image` or `List[PIL.Image.Image]` or `torch.FloatTensor`):
269
+ image (`PIL.Image.Image` or `List[PIL.Image.Image]` or `torch.Tensor`):
265
270
  Image or images to guide image generation. If you provide a tensor, it needs to be compatible with
266
271
  [`CLIPImageProcessor`](https://huggingface.co/lambdalabs/sd-image-variations-diffusers/blob/main/feature_extractor/preprocessor_config.json).
267
272
  height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
@@ -282,7 +287,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
282
287
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
283
288
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
284
289
  generation deterministic.
285
- latents (`torch.FloatTensor`, *optional*):
290
+ latents (`torch.Tensor`, *optional*):
286
291
  Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
287
292
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
288
293
  tensor is generated by sampling using the supplied random `generator`.
@@ -293,7 +298,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
293
298
  plain tuple.
294
299
  callback (`Callable`, *optional*):
295
300
  A function that calls every `callback_steps` steps during inference. The function is called with the
296
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
301
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
297
302
  callback_steps (`int`, *optional*, defaults to 1):
298
303
  The frequency at which the `callback` function is called. If not specified, the callback is called at
299
304
  every step.