diffusers 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (478) hide show
  1. diffusers/__init__.py +48 -1
  2. diffusers/commands/__init__.py +1 -1
  3. diffusers/commands/diffusers_cli.py +1 -1
  4. diffusers/commands/env.py +1 -1
  5. diffusers/commands/fp16_safetensors.py +1 -1
  6. diffusers/dependency_versions_check.py +1 -1
  7. diffusers/dependency_versions_table.py +1 -1
  8. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  9. diffusers/hooks/faster_cache.py +2 -2
  10. diffusers/hooks/group_offloading.py +128 -29
  11. diffusers/hooks/hooks.py +2 -2
  12. diffusers/hooks/layerwise_casting.py +3 -3
  13. diffusers/hooks/pyramid_attention_broadcast.py +1 -1
  14. diffusers/image_processor.py +7 -2
  15. diffusers/loaders/__init__.py +4 -0
  16. diffusers/loaders/ip_adapter.py +5 -14
  17. diffusers/loaders/lora_base.py +212 -111
  18. diffusers/loaders/lora_conversion_utils.py +275 -34
  19. diffusers/loaders/lora_pipeline.py +1554 -819
  20. diffusers/loaders/peft.py +52 -109
  21. diffusers/loaders/single_file.py +2 -2
  22. diffusers/loaders/single_file_model.py +20 -4
  23. diffusers/loaders/single_file_utils.py +225 -5
  24. diffusers/loaders/textual_inversion.py +3 -2
  25. diffusers/loaders/transformer_flux.py +1 -1
  26. diffusers/loaders/transformer_sd3.py +2 -2
  27. diffusers/loaders/unet.py +2 -16
  28. diffusers/loaders/unet_loader_utils.py +1 -1
  29. diffusers/loaders/utils.py +1 -1
  30. diffusers/models/__init__.py +15 -1
  31. diffusers/models/activations.py +5 -5
  32. diffusers/models/adapter.py +2 -3
  33. diffusers/models/attention.py +4 -4
  34. diffusers/models/attention_flax.py +10 -10
  35. diffusers/models/attention_processor.py +14 -10
  36. diffusers/models/auto_model.py +47 -10
  37. diffusers/models/autoencoders/__init__.py +1 -0
  38. diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
  39. diffusers/models/autoencoders/autoencoder_dc.py +3 -3
  40. diffusers/models/autoencoders/autoencoder_kl.py +4 -4
  41. diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
  42. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
  43. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
  44. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
  45. diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
  46. diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
  47. diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
  48. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
  49. diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
  50. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
  51. diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
  52. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  53. diffusers/models/autoencoders/vae.py +13 -2
  54. diffusers/models/autoencoders/vq_model.py +2 -2
  55. diffusers/models/cache_utils.py +1 -1
  56. diffusers/models/controlnet.py +1 -1
  57. diffusers/models/controlnet_flux.py +1 -1
  58. diffusers/models/controlnet_sd3.py +1 -1
  59. diffusers/models/controlnet_sparsectrl.py +1 -1
  60. diffusers/models/controlnets/__init__.py +1 -0
  61. diffusers/models/controlnets/controlnet.py +3 -3
  62. diffusers/models/controlnets/controlnet_flax.py +1 -1
  63. diffusers/models/controlnets/controlnet_flux.py +16 -15
  64. diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
  65. diffusers/models/controlnets/controlnet_sana.py +290 -0
  66. diffusers/models/controlnets/controlnet_sd3.py +1 -1
  67. diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
  68. diffusers/models/controlnets/controlnet_union.py +1 -1
  69. diffusers/models/controlnets/controlnet_xs.py +7 -7
  70. diffusers/models/controlnets/multicontrolnet.py +4 -5
  71. diffusers/models/controlnets/multicontrolnet_union.py +5 -6
  72. diffusers/models/downsampling.py +2 -2
  73. diffusers/models/embeddings.py +10 -12
  74. diffusers/models/embeddings_flax.py +2 -2
  75. diffusers/models/lora.py +3 -3
  76. diffusers/models/modeling_utils.py +44 -14
  77. diffusers/models/normalization.py +4 -4
  78. diffusers/models/resnet.py +2 -2
  79. diffusers/models/resnet_flax.py +1 -1
  80. diffusers/models/transformers/__init__.py +5 -0
  81. diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
  82. diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
  83. diffusers/models/transformers/consisid_transformer_3d.py +1 -1
  84. diffusers/models/transformers/dit_transformer_2d.py +2 -2
  85. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  86. diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
  87. diffusers/models/transformers/latte_transformer_3d.py +4 -5
  88. diffusers/models/transformers/lumina_nextdit2d.py +2 -2
  89. diffusers/models/transformers/pixart_transformer_2d.py +3 -3
  90. diffusers/models/transformers/prior_transformer.py +1 -1
  91. diffusers/models/transformers/sana_transformer.py +8 -3
  92. diffusers/models/transformers/stable_audio_transformer.py +5 -9
  93. diffusers/models/transformers/t5_film_transformer.py +3 -3
  94. diffusers/models/transformers/transformer_2d.py +1 -1
  95. diffusers/models/transformers/transformer_allegro.py +1 -1
  96. diffusers/models/transformers/transformer_chroma.py +742 -0
  97. diffusers/models/transformers/transformer_cogview3plus.py +5 -10
  98. diffusers/models/transformers/transformer_cogview4.py +317 -25
  99. diffusers/models/transformers/transformer_cosmos.py +579 -0
  100. diffusers/models/transformers/transformer_flux.py +9 -11
  101. diffusers/models/transformers/transformer_hidream_image.py +942 -0
  102. diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
  103. diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
  104. diffusers/models/transformers/transformer_ltx.py +2 -2
  105. diffusers/models/transformers/transformer_lumina2.py +1 -1
  106. diffusers/models/transformers/transformer_mochi.py +1 -1
  107. diffusers/models/transformers/transformer_omnigen.py +2 -2
  108. diffusers/models/transformers/transformer_sd3.py +7 -7
  109. diffusers/models/transformers/transformer_temporal.py +1 -1
  110. diffusers/models/transformers/transformer_wan.py +24 -8
  111. diffusers/models/transformers/transformer_wan_vace.py +393 -0
  112. diffusers/models/unets/unet_1d.py +1 -1
  113. diffusers/models/unets/unet_1d_blocks.py +1 -1
  114. diffusers/models/unets/unet_2d.py +1 -1
  115. diffusers/models/unets/unet_2d_blocks.py +1 -1
  116. diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
  117. diffusers/models/unets/unet_2d_condition.py +2 -2
  118. diffusers/models/unets/unet_2d_condition_flax.py +2 -2
  119. diffusers/models/unets/unet_3d_blocks.py +1 -1
  120. diffusers/models/unets/unet_3d_condition.py +3 -3
  121. diffusers/models/unets/unet_i2vgen_xl.py +3 -3
  122. diffusers/models/unets/unet_kandinsky3.py +1 -1
  123. diffusers/models/unets/unet_motion_model.py +2 -2
  124. diffusers/models/unets/unet_stable_cascade.py +1 -1
  125. diffusers/models/upsampling.py +2 -2
  126. diffusers/models/vae_flax.py +2 -2
  127. diffusers/models/vq_model.py +1 -1
  128. diffusers/pipelines/__init__.py +37 -6
  129. diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
  130. diffusers/pipelines/amused/pipeline_amused.py +7 -6
  131. diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
  132. diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
  133. diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
  134. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
  135. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
  136. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
  137. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
  138. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
  139. diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
  140. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  141. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
  142. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
  143. diffusers/pipelines/auto_pipeline.py +6 -7
  144. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  145. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  146. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
  147. diffusers/pipelines/chroma/__init__.py +49 -0
  148. diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
  149. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
  150. diffusers/pipelines/chroma/pipeline_output.py +21 -0
  151. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
  152. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
  153. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
  154. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
  155. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
  156. diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
  157. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
  158. diffusers/pipelines/consisid/consisid_utils.py +2 -2
  159. diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
  160. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  161. diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
  162. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
  163. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
  164. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
  165. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
  166. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
  167. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
  168. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
  169. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
  170. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
  171. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  172. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
  173. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
  174. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  175. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
  176. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
  177. diffusers/pipelines/cosmos/__init__.py +54 -0
  178. diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
  179. diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
  180. diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
  181. diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
  182. diffusers/pipelines/cosmos/pipeline_output.py +40 -0
  183. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
  184. diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
  185. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  186. diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
  187. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
  188. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
  189. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
  190. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
  191. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
  192. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
  193. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
  194. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  195. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
  196. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  197. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
  198. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
  199. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  200. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  201. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  202. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  203. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  204. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
  205. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
  206. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
  207. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
  208. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
  209. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
  210. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  211. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
  212. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
  213. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
  214. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
  215. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
  216. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  217. diffusers/pipelines/dit/pipeline_dit.py +1 -1
  218. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
  219. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
  220. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
  221. diffusers/pipelines/flux/modeling_flux.py +1 -1
  222. diffusers/pipelines/flux/pipeline_flux.py +10 -17
  223. diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
  224. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
  225. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
  226. diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
  227. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
  228. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
  229. diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
  230. diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
  231. diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
  232. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
  233. diffusers/pipelines/free_init_utils.py +2 -2
  234. diffusers/pipelines/free_noise_utils.py +3 -3
  235. diffusers/pipelines/hidream_image/__init__.py +47 -0
  236. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
  237. diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
  238. diffusers/pipelines/hunyuan_video/__init__.py +2 -0
  239. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
  240. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
  241. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
  242. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
  243. diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
  244. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
  245. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
  246. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
  247. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
  248. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
  249. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
  250. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
  251. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  252. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
  253. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
  254. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
  255. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
  256. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
  257. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
  258. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
  259. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
  260. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
  261. diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
  262. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
  263. diffusers/pipelines/kolors/text_encoder.py +3 -3
  264. diffusers/pipelines/kolors/tokenizer.py +1 -1
  265. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
  266. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
  267. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  268. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
  269. diffusers/pipelines/latte/pipeline_latte.py +12 -12
  270. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
  271. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
  272. diffusers/pipelines/ltx/__init__.py +4 -0
  273. diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
  274. diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
  275. diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
  276. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
  277. diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
  278. diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
  279. diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
  280. diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
  281. diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
  282. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
  283. diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
  284. diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
  285. diffusers/pipelines/onnx_utils.py +15 -2
  286. diffusers/pipelines/pag/pag_utils.py +2 -2
  287. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
  288. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
  289. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
  290. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
  291. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
  292. diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
  293. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
  294. diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
  295. diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
  296. diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
  297. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
  298. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
  299. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
  300. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
  301. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
  302. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
  303. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
  304. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  305. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
  306. diffusers/pipelines/pia/pipeline_pia.py +8 -6
  307. diffusers/pipelines/pipeline_flax_utils.py +3 -4
  308. diffusers/pipelines/pipeline_loading_utils.py +89 -13
  309. diffusers/pipelines/pipeline_utils.py +105 -33
  310. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
  311. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
  312. diffusers/pipelines/sana/__init__.py +4 -0
  313. diffusers/pipelines/sana/pipeline_sana.py +23 -21
  314. diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
  315. diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
  316. diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
  317. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
  318. diffusers/pipelines/shap_e/camera.py +1 -1
  319. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  320. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  321. diffusers/pipelines/shap_e/renderer.py +3 -3
  322. diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
  323. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
  324. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
  325. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
  326. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
  327. diffusers/pipelines/stable_diffusion/__init__.py +0 -7
  328. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  329. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
  330. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  331. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
  332. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  333. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
  334. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
  335. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
  336. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
  337. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
  338. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
  339. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
  340. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
  341. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
  342. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
  343. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
  344. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
  345. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
  346. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
  347. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  348. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  349. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  350. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
  351. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
  352. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
  353. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
  354. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
  355. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
  356. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
  357. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
  358. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
  359. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
  360. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
  361. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
  362. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  363. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
  364. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  365. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
  366. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
  367. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
  368. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
  369. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
  370. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
  371. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
  372. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
  373. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
  374. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
  375. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
  376. diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
  377. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
  378. diffusers/pipelines/unclip/text_proj.py +2 -2
  379. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
  380. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  381. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
  382. diffusers/pipelines/visualcloze/__init__.py +52 -0
  383. diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
  384. diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
  385. diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
  386. diffusers/pipelines/wan/__init__.py +2 -0
  387. diffusers/pipelines/wan/pipeline_wan.py +13 -10
  388. diffusers/pipelines/wan/pipeline_wan_i2v.py +38 -18
  389. diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
  390. diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
  391. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  392. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
  393. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  394. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  395. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
  396. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
  397. diffusers/quantizers/__init__.py +179 -1
  398. diffusers/quantizers/base.py +6 -1
  399. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
  400. diffusers/quantizers/bitsandbytes/utils.py +10 -7
  401. diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
  402. diffusers/quantizers/gguf/utils.py +16 -13
  403. diffusers/quantizers/quantization_config.py +18 -16
  404. diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
  405. diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
  406. diffusers/schedulers/__init__.py +3 -1
  407. diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
  408. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  409. diffusers/schedulers/scheduling_consistency_models.py +1 -1
  410. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
  411. diffusers/schedulers/scheduling_ddim.py +8 -8
  412. diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
  413. diffusers/schedulers/scheduling_ddim_flax.py +6 -6
  414. diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
  415. diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
  416. diffusers/schedulers/scheduling_ddpm.py +9 -9
  417. diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
  418. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
  419. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
  420. diffusers/schedulers/scheduling_deis_multistep.py +8 -8
  421. diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
  422. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
  423. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
  424. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
  425. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  426. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
  427. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
  428. diffusers/schedulers/scheduling_edm_euler.py +20 -11
  429. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
  430. diffusers/schedulers/scheduling_euler_discrete.py +3 -3
  431. diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
  432. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
  433. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
  434. diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
  435. diffusers/schedulers/scheduling_heun_discrete.py +2 -2
  436. diffusers/schedulers/scheduling_ipndm.py +2 -2
  437. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
  438. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
  439. diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
  440. diffusers/schedulers/scheduling_lcm.py +3 -3
  441. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  442. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  443. diffusers/schedulers/scheduling_pndm.py +4 -4
  444. diffusers/schedulers/scheduling_pndm_flax.py +4 -4
  445. diffusers/schedulers/scheduling_repaint.py +9 -9
  446. diffusers/schedulers/scheduling_sasolver.py +15 -15
  447. diffusers/schedulers/scheduling_scm.py +1 -1
  448. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  449. diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
  450. diffusers/schedulers/scheduling_tcd.py +3 -3
  451. diffusers/schedulers/scheduling_unclip.py +5 -5
  452. diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
  453. diffusers/schedulers/scheduling_utils.py +1 -1
  454. diffusers/schedulers/scheduling_utils_flax.py +1 -1
  455. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  456. diffusers/training_utils.py +13 -5
  457. diffusers/utils/__init__.py +5 -0
  458. diffusers/utils/accelerate_utils.py +1 -1
  459. diffusers/utils/doc_utils.py +1 -1
  460. diffusers/utils/dummy_pt_objects.py +120 -0
  461. diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
  462. diffusers/utils/dynamic_modules_utils.py +21 -3
  463. diffusers/utils/export_utils.py +1 -1
  464. diffusers/utils/import_utils.py +81 -18
  465. diffusers/utils/logging.py +1 -1
  466. diffusers/utils/outputs.py +2 -1
  467. diffusers/utils/peft_utils.py +91 -8
  468. diffusers/utils/state_dict_utils.py +20 -3
  469. diffusers/utils/testing_utils.py +59 -7
  470. diffusers/utils/torch_utils.py +25 -5
  471. diffusers/video_processor.py +2 -2
  472. {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/METADATA +70 -55
  473. diffusers-0.34.0.dist-info/RECORD +639 -0
  474. {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/WHEEL +1 -1
  475. diffusers-0.33.1.dist-info/RECORD +0 -608
  476. {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
  477. {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
  478. {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -179,7 +179,7 @@ class KandinskyV22CombinedPipeline(DiffusionPipeline):
179
179
  def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
180
180
  self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
181
181
 
182
- def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
182
+ def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None):
183
183
  r"""
184
184
  Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
185
185
  text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
@@ -242,20 +242,20 @@ class KandinskyV22CombinedPipeline(DiffusionPipeline):
242
242
  width (`int`, *optional*, defaults to 512):
243
243
  The width in pixels of the generated image.
244
244
  prior_guidance_scale (`float`, *optional*, defaults to 4.0):
245
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
246
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
247
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
248
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
249
- usually at the expense of lower image quality.
245
+ Guidance scale as defined in [Classifier-Free Diffusion
246
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
247
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
248
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
249
+ the text `prompt`, usually at the expense of lower image quality.
250
250
  prior_num_inference_steps (`int`, *optional*, defaults to 100):
251
251
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
252
252
  expense of slower inference.
253
253
  guidance_scale (`float`, *optional*, defaults to 4.0):
254
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
255
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
256
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
257
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
258
- usually at the expense of lower image quality.
254
+ Guidance scale as defined in [Classifier-Free Diffusion
255
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
256
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
257
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
258
+ the text `prompt`, usually at the expense of lower image quality.
259
259
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
260
260
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
261
261
  to make generation deterministic.
@@ -407,7 +407,7 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
407
407
  def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
408
408
  self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
409
409
 
410
- def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
410
+ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None):
411
411
  r"""
412
412
  Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
413
413
  to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
@@ -417,7 +417,7 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
417
417
  self.prior_pipe.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
418
418
  self.decoder_pipe.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
419
419
 
420
- def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
420
+ def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None):
421
421
  r"""
422
422
  Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
423
423
  text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
@@ -479,11 +479,11 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
479
479
  num_images_per_prompt (`int`, *optional*, defaults to 1):
480
480
  The number of images to generate per prompt.
481
481
  guidance_scale (`float`, *optional*, defaults to 4.0):
482
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
483
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
484
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
485
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
486
- usually at the expense of lower image quality.
482
+ Guidance scale as defined in [Classifier-Free Diffusion
483
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
484
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
485
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
486
+ the text `prompt`, usually at the expense of lower image quality.
487
487
  strength (`float`, *optional*, defaults to 0.3):
488
488
  Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image`
489
489
  will be used as a starting point, adding more noise to it the larger the `strength`. The number of
@@ -498,11 +498,11 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
498
498
  width (`int`, *optional*, defaults to 512):
499
499
  The width in pixels of the generated image.
500
500
  prior_guidance_scale (`float`, *optional*, defaults to 4.0):
501
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
502
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
503
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
504
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
505
- usually at the expense of lower image quality.
501
+ Guidance scale as defined in [Classifier-Free Diffusion
502
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
503
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
504
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
505
+ the text `prompt`, usually at the expense of lower image quality.
506
506
  prior_num_inference_steps (`int`, *optional*, defaults to 100):
507
507
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
508
508
  expense of slower inference.
@@ -656,7 +656,7 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
656
656
  def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
657
657
  self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
658
658
 
659
- def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
659
+ def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None):
660
660
  r"""
661
661
  Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
662
662
  text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
@@ -722,11 +722,11 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
722
722
  num_images_per_prompt (`int`, *optional*, defaults to 1):
723
723
  The number of images to generate per prompt.
724
724
  guidance_scale (`float`, *optional*, defaults to 4.0):
725
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
726
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
727
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
728
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
729
- usually at the expense of lower image quality.
725
+ Guidance scale as defined in [Classifier-Free Diffusion
726
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
727
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
728
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
729
+ the text `prompt`, usually at the expense of lower image quality.
730
730
  num_inference_steps (`int`, *optional*, defaults to 100):
731
731
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
732
732
  expense of slower inference.
@@ -735,11 +735,11 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
735
735
  width (`int`, *optional*, defaults to 512):
736
736
  The width in pixels of the generated image.
737
737
  prior_guidance_scale (`float`, *optional*, defaults to 4.0):
738
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
739
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
740
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
741
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
742
- usually at the expense of lower image quality.
738
+ Guidance scale as defined in [Classifier-Free Diffusion
739
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
740
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
741
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
742
+ the text `prompt`, usually at the expense of lower image quality.
743
743
  prior_num_inference_steps (`int`, *optional*, defaults to 100):
744
744
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
745
745
  expense of slower inference.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -198,11 +198,11 @@ class KandinskyV22ControlnetPipeline(DiffusionPipeline):
198
198
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
199
199
  expense of slower inference.
200
200
  guidance_scale (`float`, *optional*, defaults to 4.0):
201
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
202
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
203
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
204
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
205
- usually at the expense of lower image quality.
201
+ Guidance scale as defined in [Classifier-Free Diffusion
202
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
203
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
204
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
205
+ the text `prompt`, usually at the expense of lower image quality.
206
206
  num_images_per_prompt (`int`, *optional*, defaults to 1):
207
207
  The number of images to generate per prompt.
208
208
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -14,11 +14,10 @@
14
14
 
15
15
  from typing import Callable, List, Optional, Union
16
16
 
17
- import numpy as np
18
17
  import PIL.Image
19
18
  import torch
20
- from PIL import Image
21
19
 
20
+ from ...image_processor import VaeImageProcessor
22
21
  from ...models import UNet2DConditionModel, VQModel
23
22
  from ...schedulers import DDPMScheduler
24
23
  from ...utils import (
@@ -105,27 +104,6 @@ EXAMPLE_DOC_STRING = """
105
104
  """
106
105
 
107
106
 
108
- # Copied from diffusers.pipelines.kandinsky2_2.pipeline_kandinsky2_2.downscale_height_and_width
109
- def downscale_height_and_width(height, width, scale_factor=8):
110
- new_height = height // scale_factor**2
111
- if height % scale_factor**2 != 0:
112
- new_height += 1
113
- new_width = width // scale_factor**2
114
- if width % scale_factor**2 != 0:
115
- new_width += 1
116
- return new_height * scale_factor, new_width * scale_factor
117
-
118
-
119
- # Copied from diffusers.pipelines.kandinsky.pipeline_kandinsky_img2img.prepare_image
120
- def prepare_image(pil_image, w=512, h=512):
121
- pil_image = pil_image.resize((w, h), resample=Image.BICUBIC, reducing_gap=1)
122
- arr = np.array(pil_image.convert("RGB"))
123
- arr = arr.astype(np.float32) / 127.5 - 1
124
- arr = np.transpose(arr, [2, 0, 1])
125
- image = torch.from_numpy(arr).unsqueeze(0)
126
- return image
127
-
128
-
129
107
  class KandinskyV22ControlnetImg2ImgPipeline(DiffusionPipeline):
130
108
  """
131
109
  Pipeline for image-to-image generation using Kandinsky
@@ -157,7 +135,14 @@ class KandinskyV22ControlnetImg2ImgPipeline(DiffusionPipeline):
157
135
  scheduler=scheduler,
158
136
  movq=movq,
159
137
  )
160
- self.movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1)
138
+ movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1) if getattr(self, "movq", None) else 8
139
+ movq_latent_channels = self.movq.config.latent_channels if getattr(self, "movq", None) else 4
140
+ self.image_processor = VaeImageProcessor(
141
+ vae_scale_factor=movq_scale_factor,
142
+ vae_latent_channels=movq_latent_channels,
143
+ resample="bicubic",
144
+ reducing_gap=1,
145
+ )
161
146
 
162
147
  # Copied from diffusers.pipelines.kandinsky.pipeline_kandinsky_img2img.KandinskyImg2ImgPipeline.get_timesteps
163
148
  def get_timesteps(self, num_inference_steps, strength, device):
@@ -259,11 +244,11 @@ class KandinskyV22ControlnetImg2ImgPipeline(DiffusionPipeline):
259
244
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
260
245
  expense of slower inference.
261
246
  guidance_scale (`float`, *optional*, defaults to 4.0):
262
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
263
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
264
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
265
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
266
- usually at the expense of lower image quality.
247
+ Guidance scale as defined in [Classifier-Free Diffusion
248
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
249
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
250
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
251
+ the text `prompt`, usually at the expense of lower image quality.
267
252
  num_images_per_prompt (`int`, *optional*, defaults to 1):
268
253
  The number of images to generate per prompt.
269
254
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -316,7 +301,7 @@ class KandinskyV22ControlnetImg2ImgPipeline(DiffusionPipeline):
316
301
  f"Input is in incorrect format: {[type(i) for i in image]}. Currently, we only support PIL image and pytorch tensor"
317
302
  )
318
303
 
319
- image = torch.cat([prepare_image(i, width, height) for i in image], dim=0)
304
+ image = torch.cat([self.image_processor.preprocess(i, width, height) for i in image], dim=0)
320
305
  image = image.to(dtype=image_embeds.dtype, device=device)
321
306
 
322
307
  latents = self.movq.encode(image)["latents"]
@@ -324,7 +309,6 @@ class KandinskyV22ControlnetImg2ImgPipeline(DiffusionPipeline):
324
309
  self.scheduler.set_timesteps(num_inference_steps, device=device)
325
310
  timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
326
311
  latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
327
- height, width = downscale_height_and_width(height, width, self.movq_scale_factor)
328
312
  latents = self.prepare_latents(
329
313
  latents, latent_timestep, batch_size, num_images_per_prompt, image_embeds.dtype, device, generator
330
314
  )
@@ -379,13 +363,7 @@ class KandinskyV22ControlnetImg2ImgPipeline(DiffusionPipeline):
379
363
  if output_type not in ["pt", "np", "pil"]:
380
364
  raise ValueError(f"Only the output types `pt`, `pil` and `np` are supported not output_type={output_type}")
381
365
 
382
- if output_type in ["np", "pil"]:
383
- image = image * 0.5 + 0.5
384
- image = image.clamp(0, 1)
385
- image = image.cpu().permute(0, 2, 3, 1).float().numpy()
386
-
387
- if output_type == "pil":
388
- image = self.numpy_to_pil(image)
366
+ image = self.image_processor.postprocess(image, output_type)
389
367
 
390
368
  if not return_dict:
391
369
  return (image,)
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -14,11 +14,10 @@
14
14
 
15
15
  from typing import Callable, Dict, List, Optional, Union
16
16
 
17
- import numpy as np
18
17
  import PIL.Image
19
18
  import torch
20
- from PIL import Image
21
19
 
20
+ from ...image_processor import VaeImageProcessor
22
21
  from ...models import UNet2DConditionModel, VQModel
23
22
  from ...schedulers import DDPMScheduler
24
23
  from ...utils import deprecate, is_torch_xla_available, logging
@@ -76,27 +75,6 @@ EXAMPLE_DOC_STRING = """
76
75
  """
77
76
 
78
77
 
79
- # Copied from diffusers.pipelines.kandinsky2_2.pipeline_kandinsky2_2.downscale_height_and_width
80
- def downscale_height_and_width(height, width, scale_factor=8):
81
- new_height = height // scale_factor**2
82
- if height % scale_factor**2 != 0:
83
- new_height += 1
84
- new_width = width // scale_factor**2
85
- if width % scale_factor**2 != 0:
86
- new_width += 1
87
- return new_height * scale_factor, new_width * scale_factor
88
-
89
-
90
- # Copied from diffusers.pipelines.kandinsky.pipeline_kandinsky_img2img.prepare_image
91
- def prepare_image(pil_image, w=512, h=512):
92
- pil_image = pil_image.resize((w, h), resample=Image.BICUBIC, reducing_gap=1)
93
- arr = np.array(pil_image.convert("RGB"))
94
- arr = arr.astype(np.float32) / 127.5 - 1
95
- arr = np.transpose(arr, [2, 0, 1])
96
- image = torch.from_numpy(arr).unsqueeze(0)
97
- return image
98
-
99
-
100
78
  class KandinskyV22Img2ImgPipeline(DiffusionPipeline):
101
79
  """
102
80
  Pipeline for image-to-image generation using Kandinsky
@@ -129,7 +107,14 @@ class KandinskyV22Img2ImgPipeline(DiffusionPipeline):
129
107
  scheduler=scheduler,
130
108
  movq=movq,
131
109
  )
132
- self.movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1)
110
+ movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1) if getattr(self, "movq", None) else 8
111
+ movq_latent_channels = self.movq.config.latent_channels if getattr(self, "movq", None) else 4
112
+ self.image_processor = VaeImageProcessor(
113
+ vae_scale_factor=movq_scale_factor,
114
+ vae_latent_channels=movq_latent_channels,
115
+ resample="bicubic",
116
+ reducing_gap=1,
117
+ )
133
118
 
134
119
  # Copied from diffusers.pipelines.kandinsky.pipeline_kandinsky_img2img.KandinskyImg2ImgPipeline.get_timesteps
135
120
  def get_timesteps(self, num_inference_steps, strength, device):
@@ -240,11 +225,11 @@ class KandinskyV22Img2ImgPipeline(DiffusionPipeline):
240
225
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
241
226
  expense of slower inference.
242
227
  guidance_scale (`float`, *optional*, defaults to 4.0):
243
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
244
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
245
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
246
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
247
- usually at the expense of lower image quality.
228
+ Guidance scale as defined in [Classifier-Free Diffusion
229
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
230
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
231
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
232
+ the text `prompt`, usually at the expense of lower image quality.
248
233
  num_images_per_prompt (`int`, *optional*, defaults to 1):
249
234
  The number of images to generate per prompt.
250
235
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -319,7 +304,7 @@ class KandinskyV22Img2ImgPipeline(DiffusionPipeline):
319
304
  f"Input is in incorrect format: {[type(i) for i in image]}. Currently, we only support PIL image and pytorch tensor"
320
305
  )
321
306
 
322
- image = torch.cat([prepare_image(i, width, height) for i in image], dim=0)
307
+ image = torch.cat([self.image_processor.preprocess(i, width, height) for i in image], dim=0)
323
308
  image = image.to(dtype=image_embeds.dtype, device=device)
324
309
 
325
310
  latents = self.movq.encode(image)["latents"]
@@ -327,7 +312,6 @@ class KandinskyV22Img2ImgPipeline(DiffusionPipeline):
327
312
  self.scheduler.set_timesteps(num_inference_steps, device=device)
328
313
  timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
329
314
  latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
330
- height, width = downscale_height_and_width(height, width, self.movq_scale_factor)
331
315
  latents = self.prepare_latents(
332
316
  latents, latent_timestep, batch_size, num_images_per_prompt, image_embeds.dtype, device, generator
333
317
  )
@@ -383,21 +367,9 @@ class KandinskyV22Img2ImgPipeline(DiffusionPipeline):
383
367
  if XLA_AVAILABLE:
384
368
  xm.mark_step()
385
369
 
386
- if output_type not in ["pt", "np", "pil", "latent"]:
387
- raise ValueError(
388
- f"Only the output types `pt`, `pil` ,`np` and `latent` are supported not output_type={output_type}"
389
- )
390
-
391
370
  if not output_type == "latent":
392
- # post-processing
393
371
  image = self.movq.decode(latents, force_not_quantize=True)["sample"]
394
- if output_type in ["np", "pil"]:
395
- image = image * 0.5 + 0.5
396
- image = image.clamp(0, 1)
397
- image = image.cpu().permute(0, 2, 3, 1).float().numpy()
398
-
399
- if output_type == "pil":
400
- image = self.numpy_to_pil(image)
372
+ image = self.image_processor.postprocess(image, output_type)
401
373
  else:
402
374
  image = latents
403
375
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -343,11 +343,11 @@ class KandinskyV22InpaintPipeline(DiffusionPipeline):
343
343
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
344
344
  expense of slower inference.
345
345
  guidance_scale (`float`, *optional*, defaults to 4.0):
346
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
347
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
348
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
349
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
350
- usually at the expense of lower image quality.
346
+ Guidance scale as defined in [Classifier-Free Diffusion
347
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
348
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
349
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
350
+ the text `prompt`, usually at the expense of lower image quality.
351
351
  num_images_per_prompt (`int`, *optional*, defaults to 1):
352
352
  The number of images to generate per prompt.
353
353
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -386,7 +386,7 @@ class KandinskyV22InpaintPipeline(DiffusionPipeline):
386
386
  "As of diffusers==0.19.0 this behavior has been inverted. Now white pixels are repainted and black pixels are preserved. "
387
387
  "This way, Kandinsky's masking behavior is aligned with Stable Diffusion. "
388
388
  "THIS means that you HAVE to invert the input mask to have the same behavior as before as explained in https://github.com/huggingface/diffusers/pull/4207. "
389
- "This warning will be surpressed after the first inference call and will be removed in diffusers>0.23.0"
389
+ "This warning will be suppressed after the first inference call and will be removed in diffusers>0.23.0"
390
390
  )
391
391
  self._warn_has_been_called = True
392
392
 
@@ -179,11 +179,11 @@ class KandinskyV22PriorPipeline(DiffusionPipeline):
179
179
  The prompt not to guide the image generation. Ignored when not using guidance (i.e., ignored if
180
180
  `guidance_scale` is less than `1`).
181
181
  guidance_scale (`float`, *optional*, defaults to 4.0):
182
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
183
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
184
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
185
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
186
- usually at the expense of lower image quality.
182
+ Guidance scale as defined in [Classifier-Free Diffusion
183
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
184
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
185
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
186
+ the text `prompt`, usually at the expense of lower image quality.
187
187
 
188
188
  Examples:
189
189
 
@@ -414,11 +414,11 @@ class KandinskyV22PriorPipeline(DiffusionPipeline):
414
414
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
415
415
  tensor will ge generated by sampling using the supplied random `generator`.
416
416
  guidance_scale (`float`, *optional*, defaults to 4.0):
417
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
418
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
419
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
420
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
421
- usually at the expense of lower image quality.
417
+ Guidance scale as defined in [Classifier-Free Diffusion
418
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
419
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
420
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
421
+ the text `prompt`, usually at the expense of lower image quality.
422
422
  output_type (`str`, *optional*, defaults to `"pt"`):
423
423
  The output format of the generate image. Choose between: `"np"` (`np.array`) or `"pt"`
424
424
  (`torch.Tensor`).
@@ -203,11 +203,11 @@ class KandinskyV22PriorEmb2EmbPipeline(DiffusionPipeline):
203
203
  The prompt not to guide the image generation. Ignored when not using guidance (i.e., ignored if
204
204
  `guidance_scale` is less than `1`).
205
205
  guidance_scale (`float`, *optional*, defaults to 4.0):
206
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
207
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
208
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
209
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
210
- usually at the expense of lower image quality.
206
+ Guidance scale as defined in [Classifier-Free Diffusion
207
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
208
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
209
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
210
+ the text `prompt`, usually at the expense of lower image quality.
211
211
 
212
212
  Examples:
213
213
 
@@ -441,11 +441,11 @@ class KandinskyV22PriorEmb2EmbPipeline(DiffusionPipeline):
441
441
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
442
442
  to make generation deterministic.
443
443
  guidance_scale (`float`, *optional*, defaults to 4.0):
444
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
445
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
446
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
447
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
448
- usually at the expense of lower image quality.
444
+ Guidance scale as defined in [Classifier-Free Diffusion
445
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
446
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
447
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
448
+ the text `prompt`, usually at the expense of lower image quality.
449
449
  output_type (`str`, *optional*, defaults to `"pt"`):
450
450
  The output format of the generate image. Choose between: `"np"` (`np.array`) or `"pt"`
451
451
  (`torch.Tensor`).
@@ -368,11 +368,11 @@ class Kandinsky3Pipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
368
368
  Custom timesteps to use for the denoising process. If not defined, equal spaced `num_inference_steps`
369
369
  timesteps are used. Must be in descending order.
370
370
  guidance_scale (`float`, *optional*, defaults to 3.0):
371
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
372
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
373
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
374
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
375
- usually at the expense of lower image quality.
371
+ Guidance scale as defined in [Classifier-Free Diffusion
372
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
373
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
374
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
375
+ the text `prompt`, usually at the expense of lower image quality.
376
376
  negative_prompt (`str` or `List[str]`, *optional*):
377
377
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
378
378
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -384,8 +384,8 @@ class Kandinsky3Pipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
384
384
  width (`int`, *optional*, defaults to self.unet.config.sample_size):
385
385
  The width in pixels of the generated image.
386
386
  eta (`float`, *optional*, defaults to 0.0):
387
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
388
- [`schedulers.DDIMScheduler`], will be ignored for others.
387
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
388
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
389
389
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
390
390
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
391
391
  to make generation deterministic.