diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. diffusers/__init__.py +18 -1
  2. diffusers/callbacks.py +156 -0
  3. diffusers/commands/env.py +110 -6
  4. diffusers/configuration_utils.py +16 -11
  5. diffusers/dependency_versions_table.py +2 -1
  6. diffusers/image_processor.py +158 -45
  7. diffusers/loaders/__init__.py +2 -5
  8. diffusers/loaders/autoencoder.py +4 -4
  9. diffusers/loaders/controlnet.py +4 -4
  10. diffusers/loaders/ip_adapter.py +80 -22
  11. diffusers/loaders/lora.py +134 -20
  12. diffusers/loaders/lora_conversion_utils.py +46 -43
  13. diffusers/loaders/peft.py +4 -3
  14. diffusers/loaders/single_file.py +401 -170
  15. diffusers/loaders/single_file_model.py +290 -0
  16. diffusers/loaders/single_file_utils.py +616 -672
  17. diffusers/loaders/textual_inversion.py +41 -20
  18. diffusers/loaders/unet.py +168 -115
  19. diffusers/loaders/unet_loader_utils.py +163 -0
  20. diffusers/models/__init__.py +2 -0
  21. diffusers/models/activations.py +11 -3
  22. diffusers/models/attention.py +10 -11
  23. diffusers/models/attention_processor.py +367 -148
  24. diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
  25. diffusers/models/autoencoders/autoencoder_kl.py +18 -19
  26. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
  27. diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
  28. diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
  29. diffusers/models/autoencoders/vae.py +23 -24
  30. diffusers/models/controlnet.py +12 -9
  31. diffusers/models/controlnet_flax.py +4 -4
  32. diffusers/models/controlnet_xs.py +1915 -0
  33. diffusers/models/downsampling.py +17 -18
  34. diffusers/models/embeddings.py +147 -24
  35. diffusers/models/model_loading_utils.py +149 -0
  36. diffusers/models/modeling_flax_pytorch_utils.py +2 -1
  37. diffusers/models/modeling_flax_utils.py +4 -4
  38. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  39. diffusers/models/modeling_utils.py +118 -98
  40. diffusers/models/resnet.py +18 -23
  41. diffusers/models/transformer_temporal.py +3 -3
  42. diffusers/models/transformers/dual_transformer_2d.py +4 -4
  43. diffusers/models/transformers/prior_transformer.py +7 -7
  44. diffusers/models/transformers/t5_film_transformer.py +17 -19
  45. diffusers/models/transformers/transformer_2d.py +272 -156
  46. diffusers/models/transformers/transformer_temporal.py +10 -10
  47. diffusers/models/unets/unet_1d.py +5 -5
  48. diffusers/models/unets/unet_1d_blocks.py +29 -29
  49. diffusers/models/unets/unet_2d.py +6 -6
  50. diffusers/models/unets/unet_2d_blocks.py +137 -128
  51. diffusers/models/unets/unet_2d_condition.py +20 -15
  52. diffusers/models/unets/unet_2d_condition_flax.py +6 -5
  53. diffusers/models/unets/unet_3d_blocks.py +79 -77
  54. diffusers/models/unets/unet_3d_condition.py +13 -9
  55. diffusers/models/unets/unet_i2vgen_xl.py +14 -13
  56. diffusers/models/unets/unet_kandinsky3.py +1 -1
  57. diffusers/models/unets/unet_motion_model.py +114 -14
  58. diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
  59. diffusers/models/unets/unet_stable_cascade.py +16 -13
  60. diffusers/models/upsampling.py +17 -20
  61. diffusers/models/vq_model.py +16 -15
  62. diffusers/pipelines/__init__.py +25 -3
  63. diffusers/pipelines/amused/pipeline_amused.py +12 -12
  64. diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
  65. diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
  66. diffusers/pipelines/animatediff/__init__.py +2 -0
  67. diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
  68. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
  69. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
  70. diffusers/pipelines/animatediff/pipeline_output.py +3 -2
  71. diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
  72. diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
  73. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
  74. diffusers/pipelines/auto_pipeline.py +21 -17
  75. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  76. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
  77. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
  78. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
  79. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
  80. diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
  81. diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
  82. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
  83. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
  84. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
  85. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
  86. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
  87. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
  88. diffusers/pipelines/controlnet_xs/__init__.py +68 -0
  89. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
  90. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
  91. diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
  92. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
  93. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
  94. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
  95. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
  96. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
  97. diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
  98. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
  99. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
  100. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
  101. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
  102. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
  103. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -21
  104. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
  105. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
  106. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
  107. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
  108. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
  109. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
  110. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
  111. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
  112. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
  113. diffusers/pipelines/dit/pipeline_dit.py +3 -0
  114. diffusers/pipelines/free_init_utils.py +39 -38
  115. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
  116. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
  117. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
  118. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
  119. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
  120. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
  121. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  122. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
  123. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
  124. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
  125. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
  126. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
  127. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
  128. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
  129. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
  130. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
  131. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
  132. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
  133. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
  134. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
  135. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
  136. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
  137. diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
  138. diffusers/pipelines/marigold/__init__.py +50 -0
  139. diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
  140. diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
  141. diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
  142. diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
  143. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
  144. diffusers/pipelines/pia/pipeline_pia.py +39 -125
  145. diffusers/pipelines/pipeline_flax_utils.py +4 -4
  146. diffusers/pipelines/pipeline_loading_utils.py +268 -23
  147. diffusers/pipelines/pipeline_utils.py +266 -37
  148. diffusers/pipelines/pixart_alpha/__init__.py +8 -1
  149. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
  150. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
  151. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
  152. diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
  153. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
  154. diffusers/pipelines/shap_e/renderer.py +1 -1
  155. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +36 -22
  156. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
  157. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
  158. diffusers/pipelines/stable_diffusion/__init__.py +0 -1
  159. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
  160. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  161. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
  162. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
  163. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
  164. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
  165. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
  166. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
  167. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
  168. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
  169. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
  170. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
  171. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
  172. diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
  173. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
  174. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -42
  175. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
  176. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
  177. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
  178. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
  179. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
  180. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
  181. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
  182. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  183. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
  184. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
  185. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
  186. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
  187. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
  188. diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
  189. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
  190. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
  191. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
  192. diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
  193. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
  194. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
  195. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
  196. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
  197. diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
  198. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
  199. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
  200. diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
  201. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
  202. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
  203. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
  204. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
  205. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
  206. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
  207. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
  208. diffusers/schedulers/__init__.py +2 -2
  209. diffusers/schedulers/deprecated/__init__.py +1 -1
  210. diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
  211. diffusers/schedulers/scheduling_amused.py +5 -5
  212. diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
  213. diffusers/schedulers/scheduling_consistency_models.py +23 -25
  214. diffusers/schedulers/scheduling_ddim.py +22 -24
  215. diffusers/schedulers/scheduling_ddim_flax.py +2 -1
  216. diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
  217. diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
  218. diffusers/schedulers/scheduling_ddpm.py +20 -22
  219. diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
  220. diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
  221. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
  222. diffusers/schedulers/scheduling_deis_multistep.py +46 -42
  223. diffusers/schedulers/scheduling_dpmsolver_multistep.py +107 -77
  224. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
  225. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
  226. diffusers/schedulers/scheduling_dpmsolver_sde.py +26 -22
  227. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +90 -65
  228. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +78 -53
  229. diffusers/schedulers/scheduling_edm_euler.py +53 -30
  230. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +26 -28
  231. diffusers/schedulers/scheduling_euler_discrete.py +163 -67
  232. diffusers/schedulers/scheduling_heun_discrete.py +60 -38
  233. diffusers/schedulers/scheduling_ipndm.py +8 -8
  234. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +22 -18
  235. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +22 -18
  236. diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
  237. diffusers/schedulers/scheduling_lcm.py +21 -23
  238. diffusers/schedulers/scheduling_lms_discrete.py +27 -25
  239. diffusers/schedulers/scheduling_pndm.py +20 -20
  240. diffusers/schedulers/scheduling_repaint.py +20 -20
  241. diffusers/schedulers/scheduling_sasolver.py +55 -54
  242. diffusers/schedulers/scheduling_sde_ve.py +19 -19
  243. diffusers/schedulers/scheduling_tcd.py +39 -30
  244. diffusers/schedulers/scheduling_unclip.py +15 -15
  245. diffusers/schedulers/scheduling_unipc_multistep.py +115 -41
  246. diffusers/schedulers/scheduling_utils.py +14 -5
  247. diffusers/schedulers/scheduling_utils_flax.py +3 -3
  248. diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
  249. diffusers/training_utils.py +56 -1
  250. diffusers/utils/__init__.py +7 -0
  251. diffusers/utils/doc_utils.py +1 -0
  252. diffusers/utils/dummy_pt_objects.py +30 -0
  253. diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
  254. diffusers/utils/dynamic_modules_utils.py +24 -11
  255. diffusers/utils/hub_utils.py +3 -2
  256. diffusers/utils/import_utils.py +91 -0
  257. diffusers/utils/loading_utils.py +2 -2
  258. diffusers/utils/logging.py +1 -1
  259. diffusers/utils/peft_utils.py +32 -5
  260. diffusers/utils/state_dict_utils.py +11 -2
  261. diffusers/utils/testing_utils.py +71 -6
  262. diffusers/utils/torch_utils.py +1 -0
  263. diffusers/video_processor.py +113 -0
  264. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/METADATA +7 -7
  265. diffusers-0.28.0.dist-info/RECORD +414 -0
  266. diffusers-0.27.1.dist-info/RECORD +0 -399
  267. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
  268. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/WHEEL +0 -0
  269. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
  270. {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -129,7 +129,7 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
129
129
  movq ([`VQModel`]):
130
130
  MoVQ Decoder to generate the image from the latents.
131
131
  prior_prior ([`PriorTransformer`]):
132
- The canonincal unCLIP prior to approximate the image embedding from the text embedding.
132
+ The canonical unCLIP prior to approximate the image embedding from the text embedding.
133
133
  prior_image_encoder ([`CLIPVisionModelWithProjection`]):
134
134
  Frozen image-encoder.
135
135
  prior_text_encoder ([`CLIPTextModelWithProjection`]):
@@ -143,6 +143,7 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
143
143
 
144
144
  _load_connected_pipes = True
145
145
  model_cpu_offload_seq = "text_encoder->unet->movq->prior_prior->prior_image_encoder->prior_text_encoder"
146
+ _exclude_from_cpu_offload = ["prior_prior"]
146
147
 
147
148
  def __init__(
148
149
  self,
@@ -225,9 +226,9 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
225
226
  prior_guidance_scale: float = 4.0,
226
227
  prior_num_inference_steps: int = 25,
227
228
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
228
- latents: Optional[torch.FloatTensor] = None,
229
+ latents: Optional[torch.Tensor] = None,
229
230
  output_type: Optional[str] = "pil",
230
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
231
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
231
232
  callback_steps: int = 1,
232
233
  return_dict: bool = True,
233
234
  ):
@@ -267,7 +268,7 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
267
268
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
268
269
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
269
270
  to make generation deterministic.
270
- latents (`torch.FloatTensor`, *optional*):
271
+ latents (`torch.Tensor`, *optional*):
271
272
  Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
272
273
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
273
274
  tensor will ge generated by sampling using the supplied random `generator`.
@@ -276,7 +277,7 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
276
277
  (`np.array`) or `"pt"` (`torch.Tensor`).
277
278
  callback (`Callable`, *optional*):
278
279
  A function that calls every `callback_steps` steps during inference. The function is called with the
279
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
280
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
280
281
  callback_steps (`int`, *optional*, defaults to 1):
281
282
  The frequency at which the `callback` function is called. If not specified, the callback is called at
282
283
  every step.
@@ -346,7 +347,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
346
347
  movq ([`VQModel`]):
347
348
  MoVQ Decoder to generate the image from the latents.
348
349
  prior_prior ([`PriorTransformer`]):
349
- The canonincal unCLIP prior to approximate the image embedding from the text embedding.
350
+ The canonical unCLIP prior to approximate the image embedding from the text embedding.
350
351
  prior_image_encoder ([`CLIPVisionModelWithProjection`]):
351
352
  Frozen image-encoder.
352
353
  prior_text_encoder ([`CLIPTextModelWithProjection`]):
@@ -360,6 +361,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
360
361
 
361
362
  _load_connected_pipes = True
362
363
  model_cpu_offload_seq = "prior_text_encoder->prior_image_encoder->prior_prior->" "text_encoder->unet->movq"
364
+ _exclude_from_cpu_offload = ["prior_prior"]
363
365
 
364
366
  def __init__(
365
367
  self,
@@ -434,7 +436,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
434
436
  def __call__(
435
437
  self,
436
438
  prompt: Union[str, List[str]],
437
- image: Union[torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image]],
439
+ image: Union[torch.Tensor, PIL.Image.Image, List[torch.Tensor], List[PIL.Image.Image]],
438
440
  negative_prompt: Optional[Union[str, List[str]]] = None,
439
441
  num_inference_steps: int = 100,
440
442
  guidance_scale: float = 4.0,
@@ -445,9 +447,9 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
445
447
  prior_guidance_scale: float = 4.0,
446
448
  prior_num_inference_steps: int = 25,
447
449
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
448
- latents: Optional[torch.FloatTensor] = None,
450
+ latents: Optional[torch.Tensor] = None,
449
451
  output_type: Optional[str] = "pil",
450
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
452
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
451
453
  callback_steps: int = 1,
452
454
  return_dict: bool = True,
453
455
  ):
@@ -457,7 +459,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
457
459
  Args:
458
460
  prompt (`str` or `List[str]`):
459
461
  The prompt or prompts to guide the image generation.
460
- image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
462
+ image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
461
463
  `Image`, or tensor representing an image batch, that will be used as the starting point for the
462
464
  process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
463
465
  again.
@@ -497,7 +499,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
497
499
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
498
500
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
499
501
  to make generation deterministic.
500
- latents (`torch.FloatTensor`, *optional*):
502
+ latents (`torch.Tensor`, *optional*):
501
503
  Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
502
504
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
503
505
  tensor will ge generated by sampling using the supplied random `generator`.
@@ -506,7 +508,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
506
508
  (`np.array`) or `"pt"` (`torch.Tensor`).
507
509
  callback (`Callable`, *optional*):
508
510
  A function that calls every `callback_steps` steps during inference. The function is called with the
509
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
511
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
510
512
  callback_steps (`int`, *optional*, defaults to 1):
511
513
  The frequency at which the `callback` function is called. If not specified, the callback is called at
512
514
  every step.
@@ -586,7 +588,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
586
588
  movq ([`VQModel`]):
587
589
  MoVQ Decoder to generate the image from the latents.
588
590
  prior_prior ([`PriorTransformer`]):
589
- The canonincal unCLIP prior to approximate the image embedding from the text embedding.
591
+ The canonical unCLIP prior to approximate the image embedding from the text embedding.
590
592
  prior_image_encoder ([`CLIPVisionModelWithProjection`]):
591
593
  Frozen image-encoder.
592
594
  prior_text_encoder ([`CLIPTextModelWithProjection`]):
@@ -600,6 +602,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
600
602
 
601
603
  _load_connected_pipes = True
602
604
  model_cpu_offload_seq = "prior_text_encoder->prior_image_encoder->prior_prior->text_encoder->unet->movq"
605
+ _exclude_from_cpu_offload = ["prior_prior"]
603
606
 
604
607
  def __init__(
605
608
  self,
@@ -674,8 +677,8 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
674
677
  def __call__(
675
678
  self,
676
679
  prompt: Union[str, List[str]],
677
- image: Union[torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image]],
678
- mask_image: Union[torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image]],
680
+ image: Union[torch.Tensor, PIL.Image.Image, List[torch.Tensor], List[PIL.Image.Image]],
681
+ mask_image: Union[torch.Tensor, PIL.Image.Image, List[torch.Tensor], List[PIL.Image.Image]],
679
682
  negative_prompt: Optional[Union[str, List[str]]] = None,
680
683
  num_inference_steps: int = 100,
681
684
  guidance_scale: float = 4.0,
@@ -685,9 +688,9 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
685
688
  prior_guidance_scale: float = 4.0,
686
689
  prior_num_inference_steps: int = 25,
687
690
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
688
- latents: Optional[torch.FloatTensor] = None,
691
+ latents: Optional[torch.Tensor] = None,
689
692
  output_type: Optional[str] = "pil",
690
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
693
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
691
694
  callback_steps: int = 1,
692
695
  return_dict: bool = True,
693
696
  ):
@@ -697,7 +700,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
697
700
  Args:
698
701
  prompt (`str` or `List[str]`):
699
702
  The prompt or prompts to guide the image generation.
700
- image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
703
+ image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
701
704
  `Image`, or tensor representing an image batch, that will be used as the starting point for the
702
705
  process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
703
706
  again.
@@ -736,7 +739,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
736
739
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
737
740
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
738
741
  to make generation deterministic.
739
- latents (`torch.FloatTensor`, *optional*):
742
+ latents (`torch.Tensor`, *optional*):
740
743
  Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
741
744
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
742
745
  tensor will ge generated by sampling using the supplied random `generator`.
@@ -745,7 +748,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
745
748
  (`np.array`) or `"pt"` (`torch.Tensor`).
746
749
  callback (`Callable`, *optional*):
747
750
  A function that calls every `callback_steps` steps during inference. The function is called with the
748
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
751
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
749
752
  callback_steps (`int`, *optional*, defaults to 1):
750
753
  The frequency at which the `callback` function is called. If not specified, the callback is called at
751
754
  every step.
@@ -266,10 +266,10 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline):
266
266
  # add_noise method to overwrite the one in schedule because it use a different beta schedule for adding noise vs sampling
267
267
  def add_noise(
268
268
  self,
269
- original_samples: torch.FloatTensor,
270
- noise: torch.FloatTensor,
269
+ original_samples: torch.Tensor,
270
+ noise: torch.Tensor,
271
271
  timesteps: torch.IntTensor,
272
- ) -> torch.FloatTensor:
272
+ ) -> torch.Tensor:
273
273
  betas = torch.linspace(0.0001, 0.02, 1000, dtype=torch.float32)
274
274
  alphas = 1.0 - betas
275
275
  alphas_cumprod = torch.cumprod(alphas, dim=0)
@@ -295,9 +295,9 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline):
295
295
  def __call__(
296
296
  self,
297
297
  prompt: Union[str, List[str]],
298
- image: Union[torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image]],
299
- image_embeds: torch.FloatTensor,
300
- negative_image_embeds: torch.FloatTensor,
298
+ image: Union[torch.Tensor, PIL.Image.Image, List[torch.Tensor], List[PIL.Image.Image]],
299
+ image_embeds: torch.Tensor,
300
+ negative_image_embeds: torch.Tensor,
301
301
  negative_prompt: Optional[Union[str, List[str]]] = None,
302
302
  height: int = 512,
303
303
  width: int = 512,
@@ -307,7 +307,7 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline):
307
307
  num_images_per_prompt: int = 1,
308
308
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
309
309
  output_type: Optional[str] = "pil",
310
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
310
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
311
311
  callback_steps: int = 1,
312
312
  return_dict: bool = True,
313
313
  ):
@@ -317,12 +317,12 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline):
317
317
  Args:
318
318
  prompt (`str` or `List[str]`):
319
319
  The prompt or prompts to guide the image generation.
320
- image (`torch.FloatTensor`, `PIL.Image.Image`):
320
+ image (`torch.Tensor`, `PIL.Image.Image`):
321
321
  `Image`, or tensor representing an image batch, that will be used as the starting point for the
322
322
  process.
323
- image_embeds (`torch.FloatTensor` or `List[torch.FloatTensor]`):
323
+ image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
324
324
  The clip image embeddings for text prompt, that will be used to condition the image generation.
325
- negative_image_embeds (`torch.FloatTensor` or `List[torch.FloatTensor]`):
325
+ negative_image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
326
326
  The clip image embeddings for negative text prompt, will be used to condition the image generation.
327
327
  negative_prompt (`str` or `List[str]`, *optional*):
328
328
  The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
@@ -356,7 +356,7 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline):
356
356
  (`np.array`) or `"pt"` (`torch.Tensor`).
357
357
  callback (`Callable`, *optional*):
358
358
  A function that calls every `callback_steps` steps during inference. The function is called with the
359
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
359
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
360
360
  callback_steps (`int`, *optional*, defaults to 1):
361
361
  The frequency at which the `callback` function is called. If not specified, the callback is called at
362
362
  every step.
@@ -398,10 +398,10 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
398
398
  def __call__(
399
399
  self,
400
400
  prompt: Union[str, List[str]],
401
- image: Union[torch.FloatTensor, PIL.Image.Image],
402
- mask_image: Union[torch.FloatTensor, PIL.Image.Image, np.ndarray],
403
- image_embeds: torch.FloatTensor,
404
- negative_image_embeds: torch.FloatTensor,
401
+ image: Union[torch.Tensor, PIL.Image.Image],
402
+ mask_image: Union[torch.Tensor, PIL.Image.Image, np.ndarray],
403
+ image_embeds: torch.Tensor,
404
+ negative_image_embeds: torch.Tensor,
405
405
  negative_prompt: Optional[Union[str, List[str]]] = None,
406
406
  height: int = 512,
407
407
  width: int = 512,
@@ -409,9 +409,9 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
409
409
  guidance_scale: float = 4.0,
410
410
  num_images_per_prompt: int = 1,
411
411
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
412
- latents: Optional[torch.FloatTensor] = None,
412
+ latents: Optional[torch.Tensor] = None,
413
413
  output_type: Optional[str] = "pil",
414
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
414
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
415
415
  callback_steps: int = 1,
416
416
  return_dict: bool = True,
417
417
  ):
@@ -421,10 +421,10 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
421
421
  Args:
422
422
  prompt (`str` or `List[str]`):
423
423
  The prompt or prompts to guide the image generation.
424
- image (`torch.FloatTensor`, `PIL.Image.Image` or `np.ndarray`):
424
+ image (`torch.Tensor`, `PIL.Image.Image` or `np.ndarray`):
425
425
  `Image`, or tensor representing an image batch, that will be used as the starting point for the
426
426
  process.
427
- mask_image (`PIL.Image.Image`,`torch.FloatTensor` or `np.ndarray`):
427
+ mask_image (`PIL.Image.Image`,`torch.Tensor` or `np.ndarray`):
428
428
  `Image`, or a tensor representing an image batch, to mask `image`. White pixels in the mask will be
429
429
  repainted, while black pixels will be preserved. You can pass a pytorch tensor as mask only if the
430
430
  image you passed is a pytorch tensor, and it should contain one color channel (L) instead of 3, so the
@@ -432,9 +432,9 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
432
432
  image or numpy array, mask should also be a either PIL image or numpy array. If it is a PIL image, it
433
433
  will be converted to a single channel (luminance) before use. If it is a nummpy array, the expected
434
434
  shape is `(H, W)`.
435
- image_embeds (`torch.FloatTensor` or `List[torch.FloatTensor]`):
435
+ image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
436
436
  The clip image embeddings for text prompt, that will be used to condition the image generation.
437
- negative_image_embeds (`torch.FloatTensor` or `List[torch.FloatTensor]`):
437
+ negative_image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
438
438
  The clip image embeddings for negative text prompt, will be used to condition the image generation.
439
439
  negative_prompt (`str` or `List[str]`, *optional*):
440
440
  The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
@@ -457,7 +457,7 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
457
457
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
458
458
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
459
459
  to make generation deterministic.
460
- latents (`torch.FloatTensor`, *optional*):
460
+ latents (`torch.Tensor`, *optional*):
461
461
  Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
462
462
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
463
463
  tensor will ge generated by sampling using the supplied random `generator`.
@@ -466,7 +466,7 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
466
466
  (`np.array`) or `"pt"` (`torch.Tensor`).
467
467
  callback (`Callable`, *optional*):
468
468
  A function that calls every `callback_steps` steps during inference. The function is called with the
469
- following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
469
+ following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
470
470
  callback_steps (`int`, *optional*, defaults to 1):
471
471
  The frequency at which the `callback` function is called. If not specified, the callback is called at
472
472
  every step.
@@ -115,14 +115,14 @@ class KandinskyPriorPipelineOutput(BaseOutput):
115
115
  Output class for KandinskyPriorPipeline.
116
116
 
117
117
  Args:
118
- image_embeds (`torch.FloatTensor`)
118
+ image_embeds (`torch.Tensor`)
119
119
  clip image embeddings for text prompt
120
120
  negative_image_embeds (`List[PIL.Image.Image]` or `np.ndarray`)
121
121
  clip image embeddings for unconditional tokens
122
122
  """
123
123
 
124
- image_embeds: Union[torch.FloatTensor, np.ndarray]
125
- negative_image_embeds: Union[torch.FloatTensor, np.ndarray]
124
+ image_embeds: Union[torch.Tensor, np.ndarray]
125
+ negative_image_embeds: Union[torch.Tensor, np.ndarray]
126
126
 
127
127
 
128
128
  class KandinskyPriorPipeline(DiffusionPipeline):
@@ -134,7 +134,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
134
134
 
135
135
  Args:
136
136
  prior ([`PriorTransformer`]):
137
- The canonincal unCLIP prior to approximate the image embedding from the text embedding.
137
+ The canonical unCLIP prior to approximate the image embedding from the text embedding.
138
138
  image_encoder ([`CLIPVisionModelWithProjection`]):
139
139
  Frozen image-encoder.
140
140
  text_encoder ([`CLIPTextModelWithProjection`]):
@@ -173,12 +173,12 @@ class KandinskyPriorPipeline(DiffusionPipeline):
173
173
  @replace_example_docstring(EXAMPLE_INTERPOLATE_DOC_STRING)
174
174
  def interpolate(
175
175
  self,
176
- images_and_prompts: List[Union[str, PIL.Image.Image, torch.FloatTensor]],
176
+ images_and_prompts: List[Union[str, PIL.Image.Image, torch.Tensor]],
177
177
  weights: List[float],
178
178
  num_images_per_prompt: int = 1,
179
179
  num_inference_steps: int = 25,
180
180
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
181
- latents: Optional[torch.FloatTensor] = None,
181
+ latents: Optional[torch.Tensor] = None,
182
182
  negative_prior_prompt: Optional[str] = None,
183
183
  negative_prompt: str = "",
184
184
  guidance_scale: float = 4.0,
@@ -188,7 +188,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
188
188
  Function invoked when using the prior pipeline for interpolation.
189
189
 
190
190
  Args:
191
- images_and_prompts (`List[Union[str, PIL.Image.Image, torch.FloatTensor]]`):
191
+ images_and_prompts (`List[Union[str, PIL.Image.Image, torch.Tensor]]`):
192
192
  list of prompts and images to guide the image generation.
193
193
  weights: (`List[float]`):
194
194
  list of weights for each condition in `images_and_prompts`
@@ -200,7 +200,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
200
200
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
201
201
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
202
202
  to make generation deterministic.
203
- latents (`torch.FloatTensor`, *optional*):
203
+ latents (`torch.Tensor`, *optional*):
204
204
  Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
205
205
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
206
206
  tensor will ge generated by sampling using the supplied random `generator`.
@@ -403,7 +403,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
403
403
  num_images_per_prompt: int = 1,
404
404
  num_inference_steps: int = 25,
405
405
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
406
- latents: Optional[torch.FloatTensor] = None,
406
+ latents: Optional[torch.Tensor] = None,
407
407
  guidance_scale: float = 4.0,
408
408
  output_type: Optional[str] = "pt",
409
409
  return_dict: bool = True,
@@ -425,7 +425,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
425
425
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
426
426
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
427
427
  to make generation deterministic.
428
- latents (`torch.FloatTensor`, *optional*):
428
+ latents (`torch.Tensor`, *optional*):
429
429
  Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
430
430
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
431
431
  tensor will ge generated by sampling using the supplied random `generator`.
@@ -123,15 +123,15 @@ class KandinskyV22Pipeline(DiffusionPipeline):
123
123
  @replace_example_docstring(EXAMPLE_DOC_STRING)
124
124
  def __call__(
125
125
  self,
126
- image_embeds: Union[torch.FloatTensor, List[torch.FloatTensor]],
127
- negative_image_embeds: Union[torch.FloatTensor, List[torch.FloatTensor]],
126
+ image_embeds: Union[torch.Tensor, List[torch.Tensor]],
127
+ negative_image_embeds: Union[torch.Tensor, List[torch.Tensor]],
128
128
  height: int = 512,
129
129
  width: int = 512,
130
130
  num_inference_steps: int = 100,
131
131
  guidance_scale: float = 4.0,
132
132
  num_images_per_prompt: int = 1,
133
133
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
134
- latents: Optional[torch.FloatTensor] = None,
134
+ latents: Optional[torch.Tensor] = None,
135
135
  output_type: Optional[str] = "pil",
136
136
  return_dict: bool = True,
137
137
  callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
@@ -142,9 +142,9 @@ class KandinskyV22Pipeline(DiffusionPipeline):
142
142
  Function invoked when calling the pipeline for generation.
143
143
 
144
144
  Args:
145
- image_embeds (`torch.FloatTensor` or `List[torch.FloatTensor]`):
145
+ image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
146
146
  The clip image embeddings for text prompt, that will be used to condition the image generation.
147
- negative_image_embeds (`torch.FloatTensor` or `List[torch.FloatTensor]`):
147
+ negative_image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
148
148
  The clip image embeddings for negative text prompt, will be used to condition the image generation.
149
149
  height (`int`, *optional*, defaults to 512):
150
150
  The height in pixels of the generated image.
@@ -164,7 +164,7 @@ class KandinskyV22Pipeline(DiffusionPipeline):
164
164
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
165
165
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
166
166
  to make generation deterministic.
167
- latents (`torch.FloatTensor`, *optional*):
167
+ latents (`torch.Tensor`, *optional*):
168
168
  Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
169
169
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
170
170
  tensor will ge generated by sampling using the supplied random `generator`.